diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3483e2c968e6b..0b6a7be8e7d35 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -80,6 +80,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -604,6 +605,29 @@ class InnerLoopVectorizer { /// count of the original loop for both main loop and epilogue vectorization. void setTripCount(Value *TC) { TripCount = TC; } + BasicBlock *getMiddleBlock() { return LoopMiddleBlock; } + + BasicBlock *getMinIterBlock() { + return LoopBypassBlocks.size() == 1 ? LoopBypassBlocks[0] : nullptr; + } + + bool isOriginalLoopDead() { + auto *MinIter = getMinIterBlock(); + if (!MinIter) + return false; + { + auto BI = cast(MinIter->getTerminator()); + if (!BI->isConditional() || + BI->getCondition() != + ConstantInt::get(BI->getCondition()->getType(), 0)) + return false; + } + auto *BI = cast(LoopMiddleBlock->getTerminator()); + return BI->isConditional() && + BI->getCondition() == + ConstantInt::get(BI->getCondition()->getType(), 1); + } + protected: friend class LoopVectorizationPlanner; @@ -10105,6 +10129,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool DisableRuntimeUnroll = false; MDNode *OrigLoopID = L->getLoopID(); + + bool IsOriginalLoopDead = false; + BasicBlock *MiddleBlock = nullptr; + BasicBlock *MinIterBlock = nullptr; { using namespace ore; if (!VectorizeLoop) { @@ -10224,6 +10252,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; + IsOriginalLoopDead = LB.isOriginalLoopDead(); + MiddleBlock = LB.getMiddleBlock(); + MinIterBlock = LB.getMinIterBlock(); + // Add metadata to disable runtime unrolling a scalar loop when there // are no runtime checks about strides and memory. A scalar loop that is // rarely used is not worth unrolling. @@ -10251,7 +10283,36 @@ bool LoopVectorizePass::processLoop(Loop *L) { Hints.setAlreadyVectorized(); } - assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs())); + if (IsOriginalLoopDead) { + auto *Br = cast(MiddleBlock->getTerminator()); + for (PHINode &PN : L->getExitBlock()->phis()) { + for (unsigned I = 0; I != PN.getNumIncomingValues(); ++I) { + if (LI->getLoopFor(PN.getIncomingBlock(I)) != L) + continue; + PN.setIncomingValue(I, UndefValue::get(PN.getType())); + } + } + + formDedicatedExitBlocks(L, DT, LI, nullptr, true); + deleteDeadLoop(L, DT, SE, LI); + BasicBlock *PH = Br->getSuccessor(1); + BasicBlock *Exit = PH->getSingleSuccessor(); + + BranchInst::Create(Br->getSuccessor(0), MiddleBlock); + Br->eraseFromParent(); + + auto *Br2 = cast(MinIterBlock->getTerminator()); + BranchInst::Create(Br2->getSuccessor(1), MinIterBlock); + Br2->eraseFromParent(); + + LI->removeBlock(PH); + LI->removeBlock(Exit); + + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + DeleteDeadBlocks({PH, Exit}, &DTU); + } + + assert(!verifyFunction(*F, &dbgs())); return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 2b2742ca7ccbc..724284a8f1dea 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -8,7 +8,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @simple_memset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -18,13 +18,13 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -32,27 +32,17 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -76,7 +66,7 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @simple_memset_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 @@ -99,19 +89,9 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -135,7 +115,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-LABEL: @simple_memcpy( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -145,44 +125,32 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -211,7 +179,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 @@ -222,52 +190,40 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP2]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP2]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]]) -; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP14:%.*]] = add [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP14]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP17]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP16]], shufflevector ( insertelement ( poison, i64 4, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 4, [[TMP19]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP20]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP22]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP21]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP22]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP9]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP23:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 4 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -293,7 +249,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-LABEL: @simple_gather_scatter( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -303,47 +259,33 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[IND:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[IND:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[IND]], i64 [[INDEX]] -; CHECK-NEXT: [[IND_VAL:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[SRC]], i32 [[IND_VAL]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[DST]], i32 [[IND_VAL]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP3]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -372,7 +314,7 @@ while.end.loopexit: ; preds = %while.body define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #0 { ; CHECK-LABEL: @uniform_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -382,43 +324,32 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[SRC:%.*]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -447,7 +378,7 @@ for.end: ; preds = %for.body, %entry define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr noalias readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @cond_uniform_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -457,13 +388,13 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[SRC:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -471,46 +402,28 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP15]], poison) -; CHECK-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], zeroinitializer, [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP19]], i32 4, [[TMP18]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP17]], poison) +; CHECK-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP18]], zeroinitializer, [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP20:%.*]] = or [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP21]], i32 4, [[TMP20]]) +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) + ; CHECK-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP22]], i32 0 -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP24]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[VAL_0:%.*]] = phi i32 [ [[TMP25]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL_0]], ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -547,7 +460,7 @@ for.end: ; preds = %for.inc, %entry define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #0 { ; CHECK-LABEL: @uniform_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -557,13 +470,13 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -571,29 +484,18 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: store i32 [[VAL]], ptr [[DST]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -619,7 +521,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-LABEL: @simple_fdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -629,48 +531,34 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP15:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP15]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP17]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[SRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP17:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL1:%.*]] = load float, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[VAL2:%.*]] = load float, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[RES:%.*]] = fdiv float [[VAL1]], [[VAL2]] -; CHECK-NEXT: store float [[RES]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -699,7 +587,7 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-LABEL: @simple_idiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -709,49 +597,35 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP16:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP15]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP16]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP18]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP17]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP18]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP6]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) ; CHECK-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[RES:%.*]] = udiv i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: store i32 [[RES]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; @@ -786,20 +660,20 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -812,7 +686,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 ; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 94f24fea3609c..2478f5272075b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -207,7 +207,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-LABEL: @simple_memset_tailfold( ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: entry: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; DATA_AND_CONTROL_NO_RT_CHECK: vector.ph: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 @@ -217,13 +217,13 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[UMAX]], [[TMP8]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[UMAX]], [[TMP8]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -231,27 +231,17 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK: vector.body: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP12]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP6]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP11]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; DATA_AND_CONTROL_NO_RT_CHECK: scalar.ph: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; DATA_AND_CONTROL_NO_RT_CHECK: while.body: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; DATA_AND_CONTROL_NO_RT_CHECK: while.end.loopexit: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index f53fd5bc8d15a..1a1c4d83ca195 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -8,7 +8,7 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @test0 ; CHECK-SAME: (ptr noalias [[M3:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -49,25 +49,7 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_INC1286_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] -; CHECK: if.then1165.us: -; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 -; CHECK-NEXT: [[ADD1178_US:%.*]] = add nsw i32 [[CONV1177_US]], 10 -; CHECK-NEXT: [[CONV1179_US:%.*]] = trunc i32 [[ADD1178_US]] to i16 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 8 -; CHECK-NEXT: [[IDXPROM1181_US:%.*]] = ashr exact i64 [[L_B]], 32 -; CHECK-NEXT: [[ARRAYIDX1185_US:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[IDXPROM1181_US]] -; CHECK-NEXT: store i16 [[CONV1179_US]], ptr [[ARRAYIDX1185_US]], align 2 -; CHECK-NEXT: [[INDVARS_IV_NEXT1784]] = add nuw nsw i64 [[INDVARS_IV1783]], 1 -; CHECK-NEXT: [[EXITCOND1785:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT1784]], 16 -; CHECK-NEXT: br i1 [[EXITCOND1785]], label [[FOR_INC1286_LOOPEXIT]], label [[IF_THEN1165_US]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] ; CHECK: for.inc1286.loopexit: ; CHECK-NEXT: ret void ; @@ -98,7 +80,7 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK-LABEL: define void @test1 ; CHECK-SAME: (ptr noalias [[M3:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -141,28 +123,9 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK-NEXT: store i16 [[TMP28]], ptr [[TMP24]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_INC1286_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] -; CHECK: if.then1165.us: -; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] -; CHECK-NEXT: [[FPTR:%.*]] = load i32, ptr [[C]], align 4 -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 -; CHECK-NEXT: [[ADD1178_US:%.*]] = add nsw i32 [[CONV1177_US]], [[FPTR]] -; CHECK-NEXT: [[CONV1179_US:%.*]] = trunc i32 [[ADD1178_US]] to i16 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 8 -; CHECK-NEXT: [[IDXPROM1181_US:%.*]] = ashr exact i64 [[L_B]], 32 -; CHECK-NEXT: [[ARRAYIDX1185_US:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[IDXPROM1181_US]] -; CHECK-NEXT: store i16 [[CONV1179_US]], ptr [[ARRAYIDX1185_US]], align 2 -; CHECK-NEXT: [[INDVARS_IV_NEXT1784]] = add nuw nsw i64 [[INDVARS_IV1783]], 1 -; CHECK-NEXT: [[EXITCOND1785:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT1784]], 16 -; CHECK-NEXT: br i1 [[EXITCOND1785]], label [[FOR_INC1286_LOOPEXIT]], label [[IF_THEN1165_US]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] ; CHECK: for.inc1286.loopexit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index bee5b397ecb47..5c031fa4a7292 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -81,7 +81,7 @@ attributes #0 = { "target-cpu"="knl" } ; ; FORCE-LABEL: @PR40816( ; FORCE-NEXT: entry: -; FORCE-NEXT: br i1 false, label {{%.*}}, label [[VECTOR_PH:%.*]] +; FORCE-NEXT: br label [[VECTOR_PH:%.*]] ; FORCE: vector.ph: ; FORCE-NEXT: br label [[VECTOR_BODY:%.*]] ; FORCE: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 61cae9c1b3f5d..9233cb0a9b0eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -13,7 +13,7 @@ define void @f1() { ; CHECK-LABEL: @f1( ; CHECK-NEXT: bb1: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -27,20 +27,7 @@ define void @f1() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[BB3:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[BB1:%.*]] ] -; CHECK-NEXT: br label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[_TMP9:%.*]], [[BB2]] ] -; CHECK-NEXT: [[_TMP1:%.*]] = zext i16 0 to i64 -; CHECK-NEXT: [[_TMP2:%.*]] = getelementptr [1 x %rec8], ptr @a, i16 0, i64 [[_TMP1]] -; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64 -; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x ptr], ptr @b, i16 0, i64 [[_TMP6]] -; CHECK-NEXT: store ptr [[_TMP2]], ptr [[_TMP7]], align 8 -; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1 -; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2 -; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index fd826ce454ed8..ea417ab54b6b4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -67,7 +67,7 @@ done: define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-LABEL: @sumIfVector( ; SSE-NEXT: entry: -; SSE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SSE-NEXT: br label [[VECTOR_PH:%.*]] ; SSE: vector.ph: ; SSE-NEXT: br label [[VECTOR_BODY:%.*]] ; SSE: vector.body: @@ -86,35 +86,13 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SSE: middle.block: ; SSE-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[PREDPHI]]) -; SSE-NEXT: br i1 true, label [[DONE:%.*]], label [[SCALAR_PH]] -; SSE: scalar.ph: -; SSE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SSE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; SSE-NEXT: br label [[LOOP:%.*]] -; SSE: loop: -; SSE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; SSE-NEXT: [[TOT:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] -; SSE-NEXT: [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] -; SSE-NEXT: [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 -; SSE-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 -; SSE-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] -; SSE: do.add: -; SSE-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] -; SSE-NEXT: br label [[NEXT_ITER]] -; SSE: no.add: -; SSE-NEXT: br label [[NEXT_ITER]] -; SSE: next.iter: -; SSE-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] -; SSE-NEXT: [[I_NEXT]] = add i32 [[I]], 1 -; SSE-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 -; SSE-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop [[LOOP2:![0-9]+]] +; SSE-NEXT: br label [[DONE:%.*]] ; SSE: done: -; SSE-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; SSE-NEXT: ret double [[TOT_NEXT_LCSSA]] +; SSE-NEXT: ret double [[TMP7]] ; ; AVX-LABEL: @sumIfVector( ; AVX-NEXT: entry: -; AVX-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AVX-NEXT: br label [[VECTOR_PH:%.*]] ; AVX: vector.ph: ; AVX-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX: vector.body: @@ -133,31 +111,9 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX: middle.block: ; AVX-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[PREDPHI]]) -; AVX-NEXT: br i1 true, label [[DONE:%.*]], label [[SCALAR_PH]] -; AVX: scalar.ph: -; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; AVX-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; AVX-NEXT: br label [[LOOP:%.*]] -; AVX: loop: -; AVX-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; AVX-NEXT: [[TOT:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] -; AVX-NEXT: [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] -; AVX-NEXT: [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 -; AVX-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 -; AVX-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] -; AVX: do.add: -; AVX-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] -; AVX-NEXT: br label [[NEXT_ITER]] -; AVX: no.add: -; AVX-NEXT: br label [[NEXT_ITER]] -; AVX: next.iter: -; AVX-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] -; AVX-NEXT: [[I_NEXT]] = add i32 [[I]], 1 -; AVX-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 -; AVX-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop [[LOOP2:![0-9]+]] +; AVX-NEXT: br label [[DONE:%.*]] ; AVX: done: -; AVX-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; AVX-NEXT: ret double [[TOT_NEXT_LCSSA]] +; AVX-NEXT: ret double [[TMP7]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll index e64b02f00dfc1..7d060394ea6bc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll @@ -8,7 +8,7 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; SSE-LABEL: @foo( ; SSE-NEXT: entry: -; SSE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SSE-NEXT: br label [[VECTOR_PH:%.*]] ; SSE: vector.ph: ; SSE-NEXT: br label [[VECTOR_BODY:%.*]] ; SSE: vector.body: @@ -34,17 +34,13 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; SSE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; SSE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SSE: middle.block: -; SSE-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; SSE: scalar.ph: -; SSE-NEXT: br label [[FOR_BODY:%.*]] +; SSE-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; SSE: for.cond.cleanup: ; SSE-NEXT: ret void -; SSE: for.body: -; SSE-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; ; AVX1-LABEL: @foo( ; AVX1-NEXT: entry: -; AVX1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AVX1-NEXT: br label [[VECTOR_PH:%.*]] ; AVX1: vector.ph: ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: @@ -88,17 +84,13 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; AVX1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; AVX1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX1: middle.block: -; AVX1-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; AVX1: scalar.ph: -; AVX1-NEXT: br label [[FOR_BODY:%.*]] +; AVX1-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; AVX1: for.cond.cleanup: ; AVX1-NEXT: ret void -; AVX1: for.body: -; AVX1-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; ; AVX2-LABEL: @foo( ; AVX2-NEXT: entry: -; AVX2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AVX2-NEXT: br label [[VECTOR_PH:%.*]] ; AVX2: vector.ph: ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: @@ -142,13 +134,9 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; AVX2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; AVX2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX2: middle.block: -; AVX2-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; AVX2: scalar.ph: -; AVX2-NEXT: br label [[FOR_BODY:%.*]] +; AVX2-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; AVX2: for.cond.cleanup: ; AVX2-NEXT: ret void -; AVX2: for.body: -; AVX2-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; ; ATOM-LABEL: @foo( ; ATOM-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index e30230120c44c..d0ee13b49a796 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -206,7 +206,7 @@ exit: define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-LABEL: @test_tc_20( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -222,19 +222,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I]] -; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[LDADDR]], align 64 -; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I]] -; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 -; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 20 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -273,7 +261,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -287,7 +275,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 19ac52cf0d0b3..500887d5247d4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -19,7 +19,7 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer @@ -75,29 +75,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <4 x i32> [[TMP22]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <4 x i32> [[TMP23]], [[BIN_RDX13]] ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX14]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EARLYCND:%.*]] = icmp slt i64 [[IV]], [[LEN]] -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP25]] ; entry: %alloca = alloca [4096 x i32] @@ -129,7 +109,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -228,36 +208,15 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %alloca = alloca [4096 x i32] @@ -293,7 +252,7 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -412,35 +371,15 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP103]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP104:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP104]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP104]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP101]], [[TMP100]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP102]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP103]], [[BIN_RDX7]] ; CHECK-NEXT: [[TMP105:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP105]] ; entry: %alloca = alloca [4096 x i32] @@ -472,7 +411,7 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -703,36 +642,15 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP151]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI36]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP152:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP152]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP152]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP149]], [[TMP148]] ; CHECK-NEXT: [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP150]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP151]], [[BIN_RDX37]] ; CHECK-NEXT: [[TMP153:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR_I16P:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR_I16P]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP153]] ; entry: %alloca = alloca [4096 x i32] @@ -869,7 +787,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP81]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP81]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP78]], [[TMP77]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX]] @@ -896,7 +814,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], [[MIN_N]] -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP82]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -936,7 +854,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1036,36 +954,15 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 3072 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %alloca = alloca [4096 x i32] @@ -1148,7 +1045,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1284,36 +1181,15 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP119]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP120:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2048 -; CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP117]], [[TMP116]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP118]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP119]], [[BIN_RDX7]] ; CHECK-NEXT: [[TMP121:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP121]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4093 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP121]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP121]] ; entry: %alloca = alloca [4096 x i32] @@ -1345,7 +1221,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [1024 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1444,36 +1320,15 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %alloca = alloca [1024 x i32] @@ -1505,7 +1360,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4095 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1604,36 +1459,15 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %alloca = alloca [4095 x i32] @@ -1665,7 +1499,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16383 x i8], align 1 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1764,36 +1598,15 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %alloca = alloca [16383 x i8] @@ -1933,7 +1746,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP81]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP81]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP78]], [[TMP77]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX]] @@ -1960,7 +1773,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], [[MIN]] -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP82]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -2002,7 +1815,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCATION:%.*]] = call nonnull ptr @my_alloc(i32 16384) ; CHECK-NEXT: call void @init(ptr [[ALLOCATION]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -2101,36 +1914,15 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCATION]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %allocation = call nonnull ptr @my_alloc(i32 16384) @@ -2163,7 +1955,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCATION:%.*]] = call nonnull ptr @my_array_alloc(i32 4096, i32 4) ; CHECK-NEXT: call void @init(ptr [[ALLOCATION]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -2262,36 +2054,15 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCATION]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %allocation = call nonnull ptr @my_array_alloc(i32 4096, i32 4) @@ -2334,7 +2105,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: call void @my_free(ptr [[ALLOCATION]]) ; CHECK-NEXT: br label [[PREHEADER]] ; CHECK: preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -2433,36 +2204,15 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP79]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP77]], [[TMP76]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP78]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCATION]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP81]] ; entry: %allocation = call nonnull ptr @my_alloc(i32 16384) @@ -2542,7 +2292,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) ; CHECK-NEXT: br i1 false, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] @@ -2565,7 +2315,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -2640,7 +2390,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) ; CHECK-NEXT: br i1 false, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] @@ -2663,7 +2413,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -2738,7 +2488,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) ; CHECK-NEXT: br i1 false, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] @@ -2761,7 +2511,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -2796,7 +2546,7 @@ define i32 @neg_test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [103 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -2860,33 +2610,12 @@ define i32 @neg_test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) ; CHECK-NEXT: [[TMP37]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 52 -; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 104, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP39]] ; entry: %alloca = alloca [103 x i32] @@ -3071,7 +2800,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP135]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP136:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144 -; CHECK-NEXT: br i1 [[TMP136]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP136]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP133]], [[TMP132]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP134]], [[BIN_RDX]] @@ -3097,7 +2826,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 300 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP137]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index cc36c2ba6f758..edf6d5dc0c99f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1178,7 +1178,7 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; ; O1VEC2-LABEL: @nopragma( ; O1VEC2-NEXT: entry: -; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; O1VEC2-NEXT: br label [[VECTOR_PH:%.*]] ; O1VEC2: vector.ph: ; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 ; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -1197,27 +1197,14 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; O1VEC2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; O1VEC2: middle.block: -; O1VEC2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; O1VEC2: scalar.ph: -; O1VEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] -; O1VEC2: for.body: -; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; O1VEC2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[N]] -; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; O1VEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; O1VEC2-NEXT: br label [[FOR_END:%.*]] ; O1VEC2: for.end: -; O1VEC2-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -; O1VEC2-NEXT: ret i32 [[TMP8]] +; O1VEC2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +; O1VEC2-NEXT: ret i32 [[TMP7]] ; ; OzVEC2-LABEL: @nopragma( ; OzVEC2-NEXT: entry: -; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; OzVEC2-NEXT: br label [[VECTOR_PH:%.*]] ; OzVEC2: vector.ph: ; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 ; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -1236,23 +1223,10 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; OzVEC2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OzVEC2: middle.block: -; OzVEC2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; OzVEC2: scalar.ph: -; OzVEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] -; OzVEC2: for.body: -; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; OzVEC2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[N]] -; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; OzVEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; OzVEC2-NEXT: br label [[FOR_END:%.*]] ; OzVEC2: for.end: -; OzVEC2-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -; OzVEC2-NEXT: ret i32 [[TMP8]] +; OzVEC2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +; OzVEC2-NEXT: ret i32 [[TMP7]] ; ; O3DIS-LABEL: @nopragma( ; O3DIS-NEXT: entry: @@ -1453,7 +1427,7 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; O1VEC2: for.end: ; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 ; O1VEC2-NEXT: ret i32 [[TMP1]] @@ -1470,7 +1444,7 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 -; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OzVEC2: for.end: ; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 ; OzVEC2-NEXT: ret i32 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index ce6dd52d54557..cf42ac4aba233 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -12,7 +12,7 @@ target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" define i32 @foo_optsize() #0 { ; CHECK-LABEL: @foo_optsize( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -32,26 +32,13 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; ; AUTOVF-LABEL: @foo_optsize( ; AUTOVF-NEXT: entry: -; AUTOVF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AUTOVF-NEXT: br label [[VECTOR_PH:%.*]] ; AUTOVF: vector.ph: ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: @@ -71,20 +58,7 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 ; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: middle.block: -; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 224, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] -; AUTOVF: for.body: -; AUTOVF-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; AUTOVF-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; AUTOVF-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; AUTOVF-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; AUTOVF-NEXT: br label [[FOR_END:%.*]] ; AUTOVF: for.end: ; AUTOVF-NEXT: ret i32 0 ; @@ -112,7 +86,7 @@ attributes #0 = { optsize } define i32 @foo_minsize() #1 { ; CHECK-LABEL: @foo_minsize( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -130,28 +104,15 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; ; AUTOVF-LABEL: @foo_minsize( ; AUTOVF-NEXT: entry: -; AUTOVF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AUTOVF-NEXT: br label [[VECTOR_PH:%.*]] ; AUTOVF: vector.ph: ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTOVF: vector.body: @@ -169,22 +130,9 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 -; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AUTOVF: middle.block: -; AUTOVF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 224, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] -; AUTOVF: for.body: -; AUTOVF-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; AUTOVF-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; AUTOVF-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; AUTOVF-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; AUTOVF-NEXT: br label [[FOR_END:%.*]] ; AUTOVF: for.end: ; AUTOVF-NEXT: ret i32 0 ; @@ -214,7 +162,7 @@ attributes #1 = { minsize } define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %k) #2 { ; CHECK-LABEL: @scev4stride1( ; CHECK-NEXT: for.body.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[K:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer @@ -232,28 +180,15 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[MUL]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_07]] -; CHECK-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: ret void ; ; AUTOVF-LABEL: @scev4stride1( ; AUTOVF-NEXT: for.body.preheader: -; AUTOVF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; AUTOVF-NEXT: br label [[VECTOR_PH:%.*]] ; AUTOVF: vector.ph: ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[K:%.*]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer @@ -271,22 +206,9 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; AUTOVF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; AUTOVF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: middle.block: -; AUTOVF-NEXT: br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] -; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] -; AUTOVF: for.body: -; AUTOVF-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; AUTOVF-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] -; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[MUL]] -; AUTOVF-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; AUTOVF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_07]] -; AUTOVF-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 -; AUTOVF-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 -; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; AUTOVF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; AUTOVF: for.end.loopexit: ; AUTOVF-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll index e527006aa5b56..fa8a1c79ea236 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -66,7 +66,7 @@ for.end: ; preds = %for.body define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable { ; CHECK-LABEL: @parallel_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -76,43 +76,39 @@ define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable ; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0 -; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1 +; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP1]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2 ; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP1]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 ; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP1]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP1]] -; CHECK-NEXT: [[TMP25:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[TMP24:%.*]] = or disjoint i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP24]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -153,19 +149,19 @@ define void @mixed_metadata(ptr nocapture %a, ptr nocapture %b) nounwind uwtable ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP6]] ; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]] -; CHECK-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: store i32 [[TMP2]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP6]] +; CHECK-NEXT: store i32 [[TMP2]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP6]] ; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll index cc60359af2f8c..119c5fbdbbe6e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -12,7 +12,7 @@ target triple = "x86_64-apple-macosx10.8.0" define void @small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @small_tc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -29,21 +29,7 @@ define void @small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly % ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP6]], [[TMP7]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index c139afae9f230..f6ef635a254a9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -37,10 +37,9 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label %exit, label %scalar.ph +; CHECK-NEXT: br label %exit ; CHECK: exit: -; CHECK-NEXT: [[GEP_LCSSA:%.*]] = phi ptr [ %gep.src, %loop.latch ], [ [[TMP2]], %middle.block ] -; CHECK-NEXT: ret ptr [[GEP_LCSSA]] +; CHECK-NEXT: ret ptr [[TMP2]] ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 804bcb54b4ba5..c96e6d1584559 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -52,7 +52,7 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK-NEXT: [[ENTRY_COND:%.*]] = icmp ne i32 0, 4096 ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LOOP_EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -75,25 +75,11 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] -; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 -; CHECK-NEXT: [[SUM_INC]] = fadd fast float [[SUM]], [[VALUE]] -; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1 -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096 -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: loop.exit.loopexit: -; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP9]], [[LOOP_EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -121,7 +107,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: [[ENTRY_COND:%.*]] = icmp ne i32 0, 4096 ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LOOP_EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -140,29 +126,15 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: [[TMP7]] = fadd reassoc <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <4 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] -; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 -; CHECK-NEXT: [[SUM_INC]] = fadd reassoc float [[SUM]], [[VALUE]] -; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1 -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096 -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: loop.exit.loopexit: -; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP9]], [[LOOP_EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -190,7 +162,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: [[ENTRY_COND:%.*]] = icmp ne i32 0, 4096 ; CHECK-NEXT: br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LOOP_EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -209,29 +181,15 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: [[TMP7]] = fadd reassoc contract <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc contract <4 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call reassoc contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] -; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 -; CHECK-NEXT: [[SUM_INC]] = fadd reassoc contract float [[SUM]], [[VALUE]] -; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1 -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096 -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: loop.exit.loopexit: -; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP9]], [[LOOP_EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -287,7 +245,7 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP9]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <4 x float> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] @@ -313,7 +271,7 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[MAX_0_]] = select i1 [[CMP1_INV]], float [[TMP12]], float [[MAX_013]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; entry: %cmp12 = icmp sgt i32 %N, 0 @@ -372,7 +330,7 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP9]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf ogt <4 x float> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] @@ -398,7 +356,7 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[MAX_0_]] = select nnan ninf i1 [[CMP1_INV]], float [[TMP12]], float [[MAX_013]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; entry: %cmp12 = icmp sgt i32 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index be83329d30fef..94a71e7baedd1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -23,7 +23,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; We can optimize this test without a tail. define void @example1() optsize { ; CHECK-LABEL: @example1( -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -39,12 +39,8 @@ define void @example1() optsize { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 6: -; CHECK-NEXT: br i1 poison, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: 7: ; CHECK-NEXT: ret void ; br label %1 @@ -73,7 +69,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH5_PREHEADER:%.*]], label [[DOTPREHEADER:%.*]] ; CHECK: .lr.ph5.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP2]], 3 @@ -120,11 +116,9 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_PREHEADER_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH5:%.*]] +; CHECK-NEXT: br label [[DOT_PREHEADER_CRIT_EDGE:%.*]] ; CHECK: ..preheader_crit_edge: ; CHECK-NEXT: [[PHITMP:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: br label [[DOTPREHEADER]] @@ -133,25 +127,25 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[N]], 0 ; CHECK-NEXT: br i1 [[TMP16]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] ; CHECK: .lr.ph.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH8:%.*]], label [[VECTOR_PH9:%.*]] -; CHECK: vector.ph9: +; CHECK-NEXT: br label [[VECTOR_PH8:%.*]] +; CHECK: vector.ph8: ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[N_RND_UP10:%.*]] = add nuw nsw i64 [[TMP17]], 3 -; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[N_RND_UP10]], 8589934588 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_116:%.*]] = add nsw i64 [[TMP17]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_116]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT17]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY19:%.*]] -; CHECK: vector.body19: -; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT21]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT22]], -; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT18]] +; CHECK-NEXT: [[N_RND_UP9:%.*]] = add nuw nsw i64 [[TMP17]], 3 +; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[N_RND_UP9]], 8589934588 +; CHECK-NEXT: [[TRIP_COUNT_MINUS_114:%.*]] = add nsw i64 [[TMP17]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_114]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT15]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY17:%.*]] +; CHECK: vector.body17: +; CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ 0, [[VECTOR_PH8]] ], [ [[INDEX_NEXT29:%.*]], [[PRED_STORE_CONTINUE28:%.*]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX18]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX18]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT19]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT20]], +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT16]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i64 0 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] -; CHECK: pred.store.if23: +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] +; CHECK: pred.store.if21: ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[OFFSET_IDX]] @@ -159,11 +153,11 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP23]], [[TMP21]] ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] -; CHECK: pred.store.continue24: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] +; CHECK: pred.store.continue22: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP18]], i64 1 -; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] -; CHECK: pred.store.if25: +; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] +; CHECK: pred.store.if23: ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 @@ -172,11 +166,11 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[TMP27]] ; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP31]], [[TMP29]] ; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] -; CHECK: pred.store.continue26: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] +; CHECK: pred.store.continue24: ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP18]], i64 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] -; CHECK: pred.store.if27: +; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] +; CHECK: pred.store.if25: ; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 @@ -185,11 +179,11 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[TMP35]] ; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP39]], [[TMP37]] ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] -; CHECK: pred.store.continue28: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] +; CHECK: pred.store.continue26: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3 -; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.if29: +; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28]] +; CHECK: pred.store.if27: ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 @@ -198,19 +192,13 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[TMP43]] ; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] -; CHECK: pred.store.continue30: -; CHECK-NEXT: [[INDEX_NEXT31]] = add i64 [[INDEX20]], 4 -; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT31]], [[N_VEC12]] -; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY19]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] +; CHECK: pred.store.continue28: +; CHECK-NEXT: [[INDEX_NEXT29]] = add i64 [[INDEX18]], 4 +; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC11]] +; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY17]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block7: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH8]] -; CHECK: scalar.ph8: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph5: -; CHECK-NEXT: br i1 poison, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -263,7 +251,7 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] ; CHECK: .lr.ph.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP2]], 3 @@ -330,13 +318,9 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK: pred.store.continue19: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -405,7 +389,7 @@ define void @example23(ptr nocapture %src, ptr nocapture %dst) optsize { ; We CAN vectorize this example because the pointers are marked as noalias. define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) optsize { ; CHECK-LABEL: @example23b( -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -420,14 +404,10 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 6: -; CHECK-NEXT: br i1 poison, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: 7: ; CHECK-NEXT: ret void ; br label %1 @@ -453,7 +433,7 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; We CAN vectorize this example by folding the tail it entails. define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) optsize { ; CHECK-LABEL: @example23c( -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -522,14 +502,10 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK: pred.store.continue16: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP34:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP33:%.*]] ; CHECK: 33: -; CHECK-NEXT: br i1 poison, label [[TMP34]], label [[TMP33]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: 34: ; CHECK-NEXT: ret void ; br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 277016ca02b2f..3dd93465c2aab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) local_unnamed_addr #0 { ; CHECK-LABEL: @tail_folding_enabled( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -32,24 +32,9 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 432, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %for.body @@ -76,7 +61,7 @@ for.body: define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) local_unnamed_addr #0 { ; CHECK-LABEL: @tail_folding_disabled( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -98,26 +83,11 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <8 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 432, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; entry: br label %for.body @@ -155,7 +125,7 @@ define i32 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], 7 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8 @@ -183,30 +153,12 @@ define i32 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, ; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP10]], <8 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP11]]) -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[SUM_1:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4 -; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] -; CHECK-NEXT: [[SUM_1]] = add nuw nsw i32 [[ADD]], [[SUM_0]] -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_1_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index c5c80c3ff6992..dc9f5099021ea 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -360,7 +360,7 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4096 x i32], align 4 ; CHECK-NEXT: call void @init(ptr [[ALLOCA]]) -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -430,29 +430,9 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <4 x i32> [[TMP38]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <4 x i32> [[TMP39]], [[BIN_RDX13]] ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX14]]) -; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[BYTE:%.*]] = udiv i64 [[IV]], 8 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[BYTE]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: [[BIT:%.*]] = urem i64 [[IV]], 8 -; CHECK-NEXT: [[BIT_TRUNC:%.*]] = trunc i64 [[BIT]] to i8 -; CHECK-NEXT: [[MASK:%.*]] = lshr i8 [[EARLYCND]], [[BIT_TRUNC]] -; CHECK-NEXT: [[TEST:%.*]] = and i8 [[MASK]], 1 -; CHECK-NEXT: [[VAL:%.*]] = zext i8 [[TEST]] to i32 -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LOOP]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP41]] ; entry: %alloca = alloca [4096 x i32] @@ -502,7 +482,7 @@ define i32 @uniform_load_global() { ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI3]], [[BROADCAST_SPLAT9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP3]], [[BIN_RDX]] @@ -520,7 +500,7 @@ define i32 @uniform_load_global() { ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[LOAD]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 4096 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: loopexit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -563,7 +543,7 @@ define i32 @uniform_load_constexpr() { ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI3]], [[BROADCAST_SPLAT9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP2]], [[TMP1]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP3]], [[BIN_RDX]] @@ -581,7 +561,7 @@ define i32 @uniform_load_constexpr() { ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[LOAD]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 4096 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: loopexit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 3c2ea22e799dc..7df218482d9d4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -21,7 +21,7 @@ target triple = "x86_64-unknown-linux" define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @vectorized( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -39,21 +39,7 @@ define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture readonly ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -87,7 +73,7 @@ for.end: define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @vectorized1( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -99,31 +85,17 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP6:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP5:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP5]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP6]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP5]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP5]] ; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP6]], ptr [[TMP5]], i32 4, <8 x i1> [[TMP1]]), !llvm.access.group [[ACC_GRP6]] +; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP6]], ptr [[TMP5]], i32 4, <8 x i1> [[TMP1]]), !llvm.access.group [[ACC_GRP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP6]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP9]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP6]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -156,7 +128,7 @@ for.end: define void @vectorized2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @vectorized2( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -164,31 +136,17 @@ define void @vectorized2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP6]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP6]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: store <8 x float> [[TMP5]], ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP6]] +; CHECK-NEXT: store <8 x float> [[TMP5]], ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP6]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP6]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index 6e83cf612f82b..05a496c69bf8e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx13.0.0" define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-LABEL: @test_pr59090( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -77,23 +77,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10008, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_MUL:%.*]] = mul nuw i64 [[IV]], 6 -; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store i8 [[L]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ARRAYIDX77:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[IV_MUL]] -; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX77]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[IV_MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX97:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[ADD_2]] -; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index 91355728133da..18c11967a5cb2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -56,7 +56,7 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; ; SINK-GATHER-LABEL: @predicated_sdiv_masked_load( ; SINK-GATHER-NEXT: entry: -; SINK-GATHER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SINK-GATHER-NEXT: br label [[VECTOR_PH:%.*]] ; SINK-GATHER: vector.ph: ; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C:%.*]], i64 0 ; SINK-GATHER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer @@ -152,32 +152,9 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; SINK-GATHER-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SINK-GATHER: middle.block: ; SINK-GATHER-NEXT: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP47]]) -; SINK-GATHER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; SINK-GATHER: scalar.ph: -; SINK-GATHER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-GATHER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] -; SINK-GATHER-NEXT: br label [[FOR_BODY:%.*]] -; SINK-GATHER: for.body: -; SINK-GATHER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; SINK-GATHER-NEXT: [[R:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[T7:%.*]], [[FOR_INC]] ] -; SINK-GATHER-NEXT: [[T0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] -; SINK-GATHER-NEXT: [[T1:%.*]] = load i32, ptr [[T0]], align 4 -; SINK-GATHER-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; SINK-GATHER: if.then: -; SINK-GATHER-NEXT: [[T2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]] -; SINK-GATHER-NEXT: [[T3:%.*]] = load i32, ptr [[T2]], align 4 -; SINK-GATHER-NEXT: [[T4:%.*]] = sdiv i32 [[T3]], [[X]] -; SINK-GATHER-NEXT: [[T5:%.*]] = add nsw i32 [[T4]], [[T1]] -; SINK-GATHER-NEXT: br label [[FOR_INC]] -; SINK-GATHER: for.inc: -; SINK-GATHER-NEXT: [[T6:%.*]] = phi i32 [ [[T1]], [[FOR_BODY]] ], [ [[T5]], [[IF_THEN]] ] -; SINK-GATHER-NEXT: [[T7]] = add i32 [[R]], [[T6]] -; SINK-GATHER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; SINK-GATHER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 10000 -; SINK-GATHER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; SINK-GATHER-NEXT: br label [[FOR_END:%.*]] ; SINK-GATHER: for.end: -; SINK-GATHER-NEXT: [[T8:%.*]] = phi i32 [ [[T7]], [[FOR_INC]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] -; SINK-GATHER-NEXT: ret i32 [[T8]] +; SINK-GATHER-NEXT: ret i32 [[TMP49]] ; entry: br label %for.body @@ -409,7 +386,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; SINK-GATHER-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SINK-GATHER: middle.block: ; SINK-GATHER-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP66]]) ; SINK-GATHER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] @@ -433,7 +410,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[T6]] = add i32 [[R]], [[T5]] ; SINK-GATHER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; SINK-GATHER-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; SINK-GATHER-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]] +; SINK-GATHER-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-GATHER: for.end: ; SINK-GATHER-NEXT: [[T7:%.*]] = phi i32 [ [[T6]], [[FOR_INC]] ], [ [[TMP68]], [[MIDDLE_BLOCK]] ] ; SINK-GATHER-NEXT: ret i32 [[T7]] diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index 66153a002d0d0..034fe7c262177 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -4,7 +4,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer @@ -36,33 +36,9 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK: middle.block: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[PREDPHI5]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI7]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 182, [[MIDDLE_BLOCK]] ], [ 6, [[BB:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 35902, [[BB]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[V_2:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[P_2:%.*]], [[LOOP_LATCH]] ] -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[BODY_1:%.*]] -; CHECK: body.1: -; CHECK-NEXT: [[V_2_ADD:%.*]] = add i32 [[V_2]], 10 -; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[BODY_2:%.*]] -; CHECK: body.2: -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[V_2_ADD]], 20 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], 1 -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[XOR]] -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ 9, [[BODY_1]] ], [ 9, [[BODY_2]] ] -; CHECK-NEXT: [[P_2]] = phi i32 [ [[V_2]], [[LOOP_HEADER]] ], [ [[V_2_ADD]], [[BODY_1]] ], [ [[ADD_2]], [[BODY_2]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], 181 -; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_1]], [[E_2]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP9]], [[TMP10]] ; CHECK-NEXT: ret i32 [[RES]] ; bb: diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll index 2fb7f88e5341e..390b42773d15d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr66616.ll +++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll @@ -7,7 +7,7 @@ define void @pr66616(ptr %ptr) { ; CHECK-LABEL: define void @pr66616( ; CHECK-SAME: ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -21,50 +21,39 @@ define void @pr66616(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 true, label [[PREHEADER:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_1:%.*]] -; CHECK: loop.1: -; CHECK-NEXT: [[IV_1:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP_1]] ] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: [[INC]] = add i8 [[IV_1]], 1 -; CHECK-NEXT: [[COND1:%.*]] = icmp eq i8 [[INC]], 0 -; CHECK-NEXT: br i1 [[COND1]], label [[PREHEADER]], label [[LOOP_1]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[PREHEADER:%.*]] ; CHECK: preheader: -; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[LOOP_1]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[ADD3_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH2:%.*]], label [[VECTOR_PH3:%.*]] -; CHECK: vector.ph3: +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH2:%.*]] +; CHECK: vector.ph2: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[ADD3_LCSSA]], [[DOTCAST]] -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]] -; CHECK-NEXT: br label [[VECTOR_BODY7:%.*]] -; CHECK: vector.body7: -; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY7]] ] -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX8]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY7]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[TMP3]], [[DOTCAST]] +; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]] +; CHECK-NEXT: br label [[VECTOR_BODY5:%.*]] +; CHECK: vector.body5: +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ 0, [[VECTOR_PH2]] ], [ [[INDEX_NEXT7:%.*]], [[VECTOR_BODY5]] ] +; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX6]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY5]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block1: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH2]] -; CHECK: scalar.ph2: -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK1]] ], [ [[ADD3_LCSSA]], [[PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK1]] ], [ [[PTR]], [[PREHEADER]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK1]] ], [ [[TMP3]], [[PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK1]] ], [ [[PTR]], [[PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH2]] ] -; CHECK-NEXT: [[IV_3:%.*]] = phi ptr [ [[IV_3_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH2]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_3:%.*]] = phi ptr [ [[IV_3_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_2_I]] = add i32 [[IV_2]], 1 ; CHECK-NEXT: [[IV_3_I]] = getelementptr i8, ptr [[IV_3]], i64 1 ; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[IV_2]], 0 -; CHECK-NEXT: br i1 [[COND2]], label [[EXIT]], label [[LOOP_2]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[COND2]], label [[EXIT]], label [[LOOP_2]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll index 1dfd8d93d2cf1..aed3689f8dfe4 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll @@ -5,7 +5,7 @@ define i32 @reduction_smin(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: define i32 @reduction_smin ; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -19,14 +19,9 @@ define i32 @reduction_smin(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: br label %for.body @@ -52,7 +47,7 @@ define i32 @reduction_smin_select_ops_flipped(ptr nocapture %A, ptr nocapture %B ; CHECK-LABEL: define i32 @reduction_smin_select_ops_flipped ; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -64,16 +59,11 @@ define i32 @reduction_smin_select_ops_flipped(ptr nocapture %A, ptr nocapture %B ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: br label %for.body @@ -99,7 +89,7 @@ define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: define i32 @reduction_smin_intrinsic ; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -110,17 +100,12 @@ define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %for.body @@ -147,7 +132,7 @@ define i32 @reduction_umax(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: define i32 @reduction_umax ; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -159,16 +144,11 @@ define i32 @reduction_umax(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: br label %for.body @@ -194,7 +174,7 @@ define i32 @reduction_umax_select_ops_flipped(ptr nocapture %A, ptr nocapture %B ; CHECK-LABEL: define i32 @reduction_umax_select_ops_flipped ; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -206,16 +186,11 @@ define i32 @reduction_umax_select_ops_flipped(ptr nocapture %A, ptr nocapture %B ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: br label %for.body @@ -241,7 +216,7 @@ define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: define i32 @reduction_umax_intrinsic ; CHECK-SAME: (ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -252,17 +227,12 @@ define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index b1c5ccbead64e..f1bf0f081a1f9 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sum_single( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -61,14 +61,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP26]] ; entry: br label %.lr.ph @@ -92,7 +87,7 @@ entry: define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_sum( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -169,16 +164,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP48]] ; entry: br label %.lr.ph @@ -207,7 +197,7 @@ entry: define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sum_const( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -263,16 +253,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP29]] ; entry: br label %.lr.ph @@ -297,7 +282,7 @@ entry: define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_prod( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -374,16 +359,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP48]] ; entry: br label %.lr.ph @@ -412,7 +392,7 @@ entry: define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_mix( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -487,16 +467,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP46]] ; entry: br label %.lr.ph @@ -525,7 +500,7 @@ entry: define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_mul( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -597,16 +572,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP45]] ; entry: br label %.lr.ph @@ -633,7 +603,7 @@ entry: define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -705,16 +675,11 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP45]] ; entry: br label %for.body @@ -741,7 +706,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_or( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -811,16 +776,11 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP43]] ; entry: br label %for.body @@ -847,7 +807,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_xor( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -917,16 +877,11 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP43]] ; entry: br label %for.body @@ -953,7 +908,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_fadd( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1023,16 +978,11 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret float [[TMP43]] ; entry: br label %for.body @@ -1059,7 +1009,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_fmul( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1131,16 +1081,11 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret float [[TMP45]] ; entry: br label %for.body @@ -1167,13 +1112,13 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_min( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1216,20 +1161,15 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) -; CHECK-NEXT: [[TMP26]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) +; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: br label %for.body @@ -1254,13 +1194,13 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_max( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] @@ -1303,20 +1243,15 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) -; CHECK-NEXT: [[TMP26]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) +; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: br label %for.body @@ -1342,7 +1277,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-LABEL: @reduction_conditional( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[S:%.*]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1351,43 +1286,28 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], -; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], -; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], -; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> , <4 x i1> [[TMP11]] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP13]], <4 x float> , <4 x float> [[PREDPHI_V]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP11]], <4 x float> , <4 x float> [[PREDPHI_V]] ; CHECK-NEXT: [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then8: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] -; CHECK: if.then16: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[SUM_1_LCSSA]] +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) +; CHECK-NEXT: ret float [[TMP13]] ; entry: br label %for.body @@ -1433,7 +1353,7 @@ for.end: define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_add_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1490,19 +1410,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] +; CHECK: ._crit_edge: ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8> ; CHECK-NEXT: [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]] -; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i8 [[TMP34]] ; entry: br label %.lr.ph @@ -1529,7 +1444,7 @@ entry: define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_and_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1585,19 +1500,14 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] +; CHECK: ._crit_edge: ; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8> ; CHECK-NEXT: [[TMP33:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP32]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]] -; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i8 [[TMP33]] ; entry: br label %.lr.ph diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index 306ec125dc202..f22f19bb43027 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sum_single( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -35,17 +35,12 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] +; CHECK: ._crit_edge: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP7]], [[TMP5]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add i32 [[TMP9]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add i32 [[TMP11]], [[BIN_RDX7]] -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[BIN_RDX8]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[BIN_RDX8]] ; entry: br label %.lr.ph @@ -69,7 +64,7 @@ entry: define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-LABEL: @predicated( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -260,19 +255,14 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272 -; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] +; CHECK: ._crit_edge: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP104]], [[TMP101]] ; CHECK-NEXT: [[BIN_RDX37:%.*]] = add i32 [[TMP107]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX38:%.*]] = add i32 [[TMP110]], [[BIN_RDX37]] -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[BIN_RDX38]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[BIN_RDX38]] ; entry: br label %.lr.ph @@ -296,7 +286,7 @@ entry: define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-LABEL: @cond_rdx_pred( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 15 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16 @@ -502,23 +492,14 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.end: ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP112]], [[TMP109]] ; CHECK-NEXT: [[BIN_RDX39:%.*]] = mul i32 [[TMP115]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX40:%.*]] = mul i32 [[TMP118]], [[BIN_RDX39]] -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC]] ], [ [[BIN_RDX40]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: ret i32 [[BIN_RDX40]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index d85241167d0cd..f7eb0717018ef 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sum_single( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -20,14 +20,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP2]] ; entry: br label %.lr.ph @@ -51,7 +46,7 @@ entry: define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_sum( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -71,16 +66,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; entry: br label %.lr.ph @@ -109,7 +99,7 @@ entry: define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sum_const( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -122,16 +112,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP3]] = add i32 [[TMP2]], 12 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %.lr.ph @@ -156,7 +141,7 @@ entry: define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_prod( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -176,16 +161,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; entry: br label %.lr.ph @@ -214,7 +194,7 @@ entry: define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_mix( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -233,16 +213,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: br label %.lr.ph @@ -271,7 +246,7 @@ entry: define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_mul( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -287,16 +262,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP5]] = mul i32 [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: br label %.lr.ph @@ -323,7 +293,7 @@ entry: define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out) { ; CHECK-LABEL: @start_at_non_zero( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -338,16 +308,11 @@ define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocap ; CHECK-NEXT: [[TMP4]] = add i32 [[TMP3]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %for.body @@ -374,7 +339,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -390,16 +355,11 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5]] = and i32 [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: br label %for.body @@ -426,7 +386,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_or( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -441,16 +401,11 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP4]] = or i32 [[TMP3]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %for.body @@ -477,7 +432,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_xor( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -492,16 +447,11 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP4]] = xor i32 [[TMP3]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %for.body @@ -528,7 +478,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_fadd( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -542,16 +492,11 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP3]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP2]], <4 x float> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret float [[TMP3]] ; entry: br label %for.body @@ -578,7 +523,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_fmul( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -594,16 +539,11 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5]] = fmul fast float [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret float [[TMP5]] ; entry: br label %for.body @@ -631,7 +571,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sub_lhs( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -642,17 +582,12 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP1]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %for.body @@ -677,7 +612,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-LABEL: @reduction_conditional( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[S:%.*]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -702,27 +637,12 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then8: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] -; CHECK: if.then16: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[SUM_1_LCSSA]] +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) +; CHECK-NEXT: ret float [[TMP13]] ; entry: br label %for.body @@ -814,7 +734,7 @@ end: define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_predicated( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -834,16 +754,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; entry: br label %.lr.ph @@ -872,7 +787,7 @@ entry: define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_add_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -884,17 +799,12 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP2]] = add <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]]) +; CHECK-NEXT: ret i8 [[TMP4]] ; entry: br label %.lr.ph @@ -921,7 +831,7 @@ entry: define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_and_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -933,17 +843,12 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP2]] = and <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP2]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP35:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP2]]) +; CHECK-NEXT: ret i8 [[TMP4]] ; entry: br label %.lr.ph @@ -987,7 +892,7 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP4]] = fadd float [[TMP3]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1005,7 +910,7 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP6]], float [[TMP7]], float [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MULADD_LCSSA]] @@ -1130,7 +1035,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[I]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]] @@ -1157,7 +1062,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-NEXT: [[G_1]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[G_016]], [[FOR_BODY2]] ] ; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: for.end7: ; CHECK-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[G_1_LCSSA]] @@ -1219,7 +1124,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[TMP8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[I]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]] @@ -1247,7 +1152,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[G_1]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[G_016]], [[FOR_BODY2]] ] ; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP41:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: for.end7: ; CHECK-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[G_1_LCSSA]] @@ -1287,7 +1192,7 @@ for.end7: ; preds = %for.inc5 define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-LABEL: @predicated_or_dominates_reduction( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1358,27 +1263,16 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP19]], ; CHECK-NEXT: [[TMP47:%.*]] = or <4 x i1> [[TMP45]], [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = bitcast <4 x i1> [[TMP47]] to i4 -; CHECK-NEXT: [[TMP49:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP48]]), !range [[RNG42:![0-9]+]] +; CHECK-NEXT: [[TMP49:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP48]]), !range [[RNG25:![0-9]+]] ; CHECK-NEXT: [[TMP50:%.*]] = zext nneg i4 [[TMP49]] to i32 ; CHECK-NEXT: [[TMP51]] = add i32 [[VEC_PHI]], [[TMP50]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[A_1_LCSSA]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[LOR_LHS_FALSE:%.*]], label [[IF_THEN:%.*]] -; CHECK: lor.lhs.false: -; CHECK-NEXT: br i1 poison, label [[FOR_INC]], label [[IF_THEN]] -; CHECK: if.then: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP51]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index 7fd762c7b735a..2d1f9dfb939f9 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_sum_single( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -60,15 +60,10 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) +; CHECK-NEXT: ret i32 [[TMP27]] ; entry: br label %.lr.ph @@ -91,7 +86,7 @@ entry: define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_sum( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -161,17 +156,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) +; CHECK-NEXT: ret i32 [[TMP45]] ; entry: br label %.lr.ph @@ -198,7 +188,7 @@ entry: define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: @reduction_prod( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -267,17 +257,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP42]]) +; CHECK-NEXT: ret i32 [[TMP44]] ; entry: br label %.lr.ph @@ -303,7 +288,7 @@ entry: define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_and( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -372,17 +357,12 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP42]]) +; CHECK-NEXT: ret i32 [[TMP44]] ; entry: br label %for.body @@ -408,7 +388,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_or( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -477,17 +457,12 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP42]]) +; CHECK-NEXT: ret i32 [[TMP44]] ; entry: br label %for.body @@ -513,7 +488,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_xor( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -582,17 +557,12 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP42]]) +; CHECK-NEXT: ret i32 [[TMP44]] ; entry: br label %for.body @@ -618,7 +588,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_fadd( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -687,17 +657,12 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP42]]) +; CHECK-NEXT: ret float [[TMP44]] ; entry: br label %for.body @@ -723,7 +688,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_fmul( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -792,17 +757,12 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP42]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP42]]) +; CHECK-NEXT: ret float [[TMP44]] ; entry: br label %for.body @@ -828,7 +788,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_min( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -880,17 +840,12 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]]) +; CHECK-NEXT: ret i32 [[TMP27]] ; entry: br label %for.body @@ -914,7 +869,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @reduction_max( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -966,17 +921,12 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]]) +; CHECK-NEXT: ret i32 [[TMP27]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index ba82bac6fad26..0cd8730f7cd3d 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -748,7 +748,7 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-LABEL: define float @reduction_conditional( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], float [[S:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[S]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -775,25 +775,10 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then8: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] -; CHECK: if.then16: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[SUM_1_LCSSA]] +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) +; CHECK-NEXT: ret float [[TMP13]] ; entry: br label %for.body @@ -1061,7 +1046,7 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] @@ -1084,7 +1069,7 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[SUM_COPY:%.*]] = phi i32 [ [[TMP17]], [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[TMP18:%.*]] = shl i32 [[SUM_COPY]], 1 @@ -1219,7 +1204,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: [[TMP4]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] @@ -1241,7 +1226,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[LCSSA_EXIT:%.*]] = phi i64 [ [[SUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[LCSSA_EXIT]] @@ -1297,7 +1282,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: [[TMP4]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] @@ -1320,7 +1305,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[LCSSA_EXIT:%.*]] = phi i64 [ [[SUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[LCSSA_EXIT]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index d5df8afc80a79..8ef25025a75aa 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -304,7 +304,7 @@ loopexit: define dso_local void @forced_optsize(ptr noalias nocapture readonly %x_p, ptr noalias nocapture readonly %y_p, ptr noalias nocapture %z_p) minsize optsize { ; CHECK-LABEL: @forced_optsize( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -320,13 +320,9 @@ define dso_local void @forced_optsize(ptr noalias nocapture readonly %x_p, ptr n ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; ; FORCED_OPTSIZE-LABEL: @forced_optsize( ; FORCED_OPTSIZE-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll index 521af746dffce..e21628179f84e 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll @@ -9,7 +9,7 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-LABEL: define void @test_pr63368 ; CHECK-SAME: (i1 [[C:%.*]], ptr [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -19,27 +19,15 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT_1:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_1_HEADER:%.*]] -; CHECK: loop.1.header: -; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_1_LATCH:%.*]] ] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_1_LATCH]], label [[LOOP_1_LATCH]] -; CHECK: loop.1.latch: -; CHECK-NEXT: [[L_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_1_HEADER]] ], [ [[L]], [[LOOP_1_HEADER]] ] -; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i32 [[IV_1]], 1 -; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i32 [[IV_1_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT_1]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[VECTOR_BODY]] ] +; CHECK-NEXT: br label [[EXIT_1:%.*]] ; CHECK: exit.1: -; CHECK-NEXT: [[L_LCSSA_LCSSA:%.*]] = phi i32 [ [[L_LCSSA]], [[LOOP_1_LATCH]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[L_LCSSA_LCSSA]], i32 -1) +; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[DOTLCSSA]], i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX1]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH3:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[L_LCSSA_LCSSA]], i32 -1) +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[DOTLCSSA]], i32 -1) ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMAX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i8 [[TMP4]], 0 @@ -51,37 +39,37 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i32 [[TMP3]], 255 ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP7]], [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH3]], label [[VECTOR_PH4:%.*]] -; CHECK: vector.ph4: +; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH3:%.*]] +; CHECK: vector.ph3: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8 -; CHECK-NEXT: br label [[VECTOR_BODY7:%.*]] -; CHECK: vector.body6: -; CHECK-NEXT: [[INDEX8:%.*]] = phi i32 [ 0, [[VECTOR_PH4]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY7]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX8]] to i8 +; CHECK-NEXT: br label [[VECTOR_BODY4:%.*]] +; CHECK: vector.body4: +; CHECK-NEXT: [[INDEX5:%.*]] = phi i32 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY4]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX5]] to i8 ; CHECK-NEXT: [[TMP14:%.*]] = add i8 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP15:%.*]] = add i8 [[TMP14]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0 ; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP17]], align 1 -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i32 [[INDEX8]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT9]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK2:%.*]], label [[VECTOR_BODY7]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX5]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT6]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK2:%.*]], label [[VECTOR_BODY4]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block2: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT_2:%.*]], label [[SCALAR_PH3]] -; CHECK: scalar.ph3: -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK2]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_2:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK2]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL5]], [[SCALAR_PH3]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] ; CHECK-NEXT: [[IV_2_NEXT]] = add i8 [[IV_2]], 1 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i8 [[IV_2_NEXT]] ; CHECK-NEXT: store i8 0, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[IV_2_SEXT:%.*]] = sext i8 [[IV_2]] to i32 -; CHECK-NEXT: [[EC_2:%.*]] = icmp sge i32 [[L_LCSSA_LCSSA]], [[IV_2_SEXT]] -; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP_2]], label [[EXIT_2]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[EC_2:%.*]] = icmp sge i32 [[DOTLCSSA]], [[IV_2_SEXT]] +; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP_2]], label [[EXIT_2]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit.2: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index 16ab45415b5cc..5cbe75b216b0d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -6,7 +6,7 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-LABEL: define i64 @pr62565_incoming_value_known_undef ; CHECK-SAME: (i64 [[A:%.*]], ptr [[SRC:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -28,24 +28,9 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], undef ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 undef -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ undef, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 -; CHECK-NEXT: [[SELECT]] = select i1 [[C]], i64 [[RED]], i64 [[A]] -; CHECK-NEXT: [[ADD]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[PHI]] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop @@ -70,7 +55,7 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-LABEL: define i64 @pr62565_incoming_value_known_poison ; CHECK-SAME: (i64 [[A:%.*]], ptr [[SRC:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -87,29 +72,14 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], poison ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 poison -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ poison, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 -; CHECK-NEXT: [[SELECT]] = select i1 [[C]], i64 [[RED]], i64 [[A]] -; CHECK-NEXT: [[ADD]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[PHI]] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop @@ -134,7 +104,7 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-LABEL: define i64 @pr62565_incoming_value_may_be_poison ; CHECK-SAME: (i64 [[A:%.*]], ptr [[SRC:%.*]], i64 [[START:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <2 x i64> [[MINMAX_IDENT_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -153,31 +123,16 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 [[START]] -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[START]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 -; CHECK-NEXT: [[SELECT]] = select i1 [[C]], i64 [[RED]], i64 [[A]] -; CHECK-NEXT: [[ADD]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[PHI]] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index a3ab8dc4016f4..fafd019520a7a 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -12,7 +12,7 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[EXTRA_ITER_CHECK:%.*]] = icmp eq i64 [[EXTRA_ITER]], 0 ; CHECK-NEXT: br i1 [[EXTRA_ITER_CHECK]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[EXTRA_ITER]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 @@ -37,24 +37,11 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) -; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[NEXT:%.*]] = phi i32 [ [[SEL:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SEL_COND:%.*]] = icmp sgt i32 [[NEXT]], 10 -; CHECK-NEXT: [[SEL]] = select i1 [[SEL_COND]], i32 [[NEXT]], i32 10 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[CHECK]] ], [ [[SEL_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[CHECK]] ], [ [[TMP5]], [[EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[RESULT]] ; entry: @@ -82,7 +69,7 @@ exit: define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-LABEL: @pr66895_tail_fold_reduction_exit_inst_gets_simplified( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -95,24 +82,12 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ 12, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RED_NEXT]] = mul i32 [[RED]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RED_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 4087f9b140e38..5a259af8e52a7 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -9,7 +9,7 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-LABEL: @single_incoming_phi_no_blend_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -34,29 +34,7 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: br label [[LOOP_COND:%.*]] -; CHECK: loop.cond: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]] -; CHECK-NEXT: store i16 [[RES]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -95,7 +73,7 @@ exit: define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-LABEL: @single_incoming_phi_with_blend_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -123,32 +101,9 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.cond: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]] -; CHECK-NEXT: store i16 [[RES]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -186,7 +141,7 @@ exit: define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-LABEL: @multiple_incoming_phi_with_blend_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -216,29 +171,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], [[LOOP_NEXT]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i16 [[LV]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -274,7 +209,7 @@ exit: define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-LABEL: @single_incoming_needs_predication( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -318,32 +253,9 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.cond: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]] -; CHECK-NEXT: store i16 [[RES]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 63 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -382,7 +294,7 @@ exit: define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-LABEL: @duplicated_incoming_blocks_blend( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -395,23 +307,9 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ [[IV]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[P]] -; CHECK-NEXT: store i32 [[P]], ptr [[GEP_PTR]], align 4 -; CHECK-NEXT: [[ADD_I]] = add nsw i32 [[P]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll index 2278821c5b2f9..d90c49e5708f4 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll @@ -4,29 +4,24 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) { ; CHECK-LABEL: @reduction_and_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP3]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[AND_LCSSA_OFF0]] +; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]]) +; CHECK-NEXT: ret i8 [[TMP4]] ; entry: br label %for.body @@ -51,29 +46,24 @@ for.end: define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-LABEL: @reduction_or_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]] +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP4]] ; entry: br label %for.body @@ -98,29 +88,24 @@ for.end: define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) { ; CHECK-LABEL: @reduction_xor_trunc( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP3]]) -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]] +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP2]]) +; CHECK-NEXT: ret i16 [[TMP4]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index edf719564e6ee..515ff0d9e8856 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -4,10 +4,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -26,21 +26,7 @@ define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[F]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -64,10 +50,10 @@ exit: } define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_shl_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_shl_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -84,23 +70,9 @@ define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = shl i32 [[F]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -124,10 +96,10 @@ exit: } define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_ashr_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_ashr_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -144,23 +116,9 @@ define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = ashr i32 [[F]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -184,10 +142,10 @@ exit: } define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_shl_const_shifted_op -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_shl_const_shifted_op( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -204,25 +162,9 @@ define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LV]] to i32 -; CHECK-NEXT: [[L:%.*]] = shl i32 19, [[ZEXT]] -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -249,10 +191,10 @@ exit: define void @test_lshr_by_18(ptr %A) { -; CHECK-LABEL: define void @test_lshr_by_18 -; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-LABEL: define void @test_lshr_by_18( +; CHECK-SAME: ptr [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -269,25 +211,9 @@ define void @test_lshr_by_18(ptr %A) { ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_EXT]] -; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[LV_EXT:%.*]] = zext i8 [[LV]] to i32 -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[LV_EXT]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -313,10 +239,10 @@ exit: } define void @test_lshr_by_4(ptr %A) { -; CHECK-LABEL: define void @test_lshr_by_4 -; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-LABEL: define void @test_lshr_by_4( +; CHECK-SAME: ptr [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -332,26 +258,10 @@ define void @test_lshr_by_4(ptr %A) { ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i16> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_EXT]] -; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[LV_EXT:%.*]] = zext i8 [[LV]] to i32 -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[LV_EXT]], 4 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -375,19 +285,3 @@ loop: exit: ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 4cee3e3cb6832..88a08c0f9870e 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -8,7 +8,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div1_step1_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -26,21 +26,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 1 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -67,7 +53,7 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div2_step1_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -84,23 +70,9 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 2 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -126,7 +98,7 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div3_step1_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -149,23 +121,9 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 3 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -191,7 +149,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div1_step2_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -219,23 +177,9 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 1 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -261,7 +205,7 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div2_step2_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -282,23 +226,9 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 2 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -324,7 +254,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div3_step2_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -352,23 +282,9 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 3 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -422,7 +338,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -438,7 +354,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -492,7 +408,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -508,7 +424,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -555,7 +471,7 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -571,7 +487,7 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -614,7 +530,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -630,7 +546,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -680,7 +596,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -696,7 +612,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -746,7 +662,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -762,7 +678,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -817,7 +733,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -833,7 +749,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -881,7 +797,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -897,7 +813,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -952,7 +868,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -968,7 +884,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1023,7 +939,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1039,7 +955,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1094,7 +1010,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1110,7 +1026,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1158,7 +1074,7 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1174,7 +1090,7 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1199,7 +1115,7 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-LABEL: define void @test_step_is_not_invariant ; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -1221,24 +1137,9 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 56, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[IV]], [[IV]] -; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; CHECK-NEXT: [[DIV5:%.*]] = udiv i16 [[DIV_LHS_TRUNC]], 6 -; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[IV]] to i16 -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i16 [[DIV5]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: store i16 [[CONV]], ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 56 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index 7f8b33e97360c..ffc948461aafc 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -8,7 +8,7 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_and_neg1_step1_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -26,21 +26,7 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], -1 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AND]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -67,7 +53,7 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_and_neg2_step1_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -84,23 +70,9 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], -2 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AND]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -126,7 +98,7 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_and_neg3_step1_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -149,23 +121,9 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], -3 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AND]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -191,7 +149,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_and_neg1_step2_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -219,23 +177,9 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], -1 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AND]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -261,7 +205,7 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_and_neg2_step2_start0_ind1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -283,23 +227,9 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AND]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -353,7 +283,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -369,7 +299,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -423,7 +353,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -439,7 +369,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -489,7 +419,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -505,7 +435,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -560,7 +490,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -576,7 +506,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -631,7 +561,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -647,7 +577,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -702,7 +632,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -718,7 +648,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 098e29eb69166..11dadc0238efe 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -9,7 +9,7 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div2_urem3_1 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -59,22 +59,7 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 2 -; CHECK-NEXT: [[REM:%.*]] = urem i64 [[DIV]], 3 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[REM]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -101,7 +86,7 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div2_urem3_2 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -150,24 +135,9 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV_NEXT]], 2 -; CHECK-NEXT: [[REM:%.*]] = urem i64 [[DIV]], 3 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[REM]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -194,7 +164,7 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div4 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -241,23 +211,9 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 4 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[DIV]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -284,7 +240,7 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: define void @ld_div8_urem3 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -302,24 +258,9 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <8 x i64> [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[IV]], 8 -; CHECK-NEXT: [[REM:%.*]] = urem i64 [[DIV]], 3 -; CHECK-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[REM]] -; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; CHECK-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; CHECK-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index f79772915b024..67c89af371044 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -9,7 +9,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_lshr0_step1_start0_ind1 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -27,28 +27,14 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 0 -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr0_step1_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -66,21 +52,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 0 -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -107,7 +79,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_lshr1_step1_start0_ind1 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -124,30 +96,16 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 1 -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr1_step1_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -178,23 +136,9 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 1 -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -221,7 +165,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_lshr2_step1_start0_ind1 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -238,30 +182,16 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 2 -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr2_step1_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -278,23 +208,9 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 2 -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -320,7 +236,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_lshr0_step2_start0_ind1 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -348,30 +264,16 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 0 -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr0_step2_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -415,23 +317,9 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 0 -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -457,7 +345,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_lshr1_step2_start0_ind1 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -478,30 +366,16 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 1 -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr1_step2_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -530,23 +404,9 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP15]], ptr [[TMP11]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[LSHR:%.*]] = lshr i64 [[IV]], 1 -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[LSHR]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -600,7 +460,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -616,7 +476,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -667,7 +527,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -683,7 +543,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -737,7 +597,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -753,7 +613,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -804,7 +664,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -820,7 +680,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -871,7 +731,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -887,7 +747,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -926,7 +786,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -942,7 +802,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -990,7 +850,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP10]], ptr [[TMP8]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1006,7 +866,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1043,7 +903,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP16]], ptr [[TMP12]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF4-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1059,7 +919,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1114,7 +974,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1130,7 +990,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1182,7 +1042,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1198,7 +1058,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1253,7 +1113,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1269,7 +1129,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1321,7 +1181,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1337,7 +1197,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index b8401caf56024..991b5b20ec79f 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -7,7 +7,7 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_div1_step1_start0_ind2 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -36,33 +36,14 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1 -; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1 -; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div1_step1_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -99,26 +80,7 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1 -; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1 -; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -149,7 +111,7 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_div2_step1_start0_ind2 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -169,35 +131,16 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2 -; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2 -; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div2_step1_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -232,28 +175,9 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2 -; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2 -; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -283,7 +207,7 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_div3_step1_start0_ind2 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -310,35 +234,16 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3 -; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3 -; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div3_step1_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -373,28 +278,9 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3 -; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3 -; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -424,7 +310,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_div1_step2_start0_ind2 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -456,35 +342,16 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1 -; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1 -; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div1_step2_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -532,28 +399,9 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 1 -; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 1 -; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -583,7 +431,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_div2_step2_start0_ind2 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -615,35 +463,16 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2 -; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2 -; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div2_step2_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -691,28 +520,9 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 2 -; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 2 -; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -742,7 +552,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-LABEL: define void @ld_div3_step2_start0_ind2 ; VF2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF2-NEXT: entry: -; VF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF2-NEXT: br label [[VECTOR_PH:%.*]] ; VF2: vector.ph: ; VF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2: vector.body: @@ -774,35 +584,16 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: -; VF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF2: scalar.ph: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF2-NEXT: br label [[LOOP:%.*]] -; VF2: loop: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF2-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3 -; VF2-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3 -; VF2-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF2-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF2-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF2-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF2-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF2-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: ; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div3_step2_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { ; VF4-NEXT: entry: -; VF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF4-NEXT: br label [[VECTOR_PH:%.*]] ; VF4: vector.ph: ; VF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF4: vector.body: @@ -850,28 +641,9 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: -; VF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; VF4: scalar.ph: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; VF4-NEXT: br label [[LOOP:%.*]] -; VF4: loop: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] -; VF4-NEXT: [[DIV1:%.*]] = udiv i64 [[IV]], 3 -; VF4-NEXT: [[DIV2:%.*]] = udiv i64 [[IV2]], 3 -; VF4-NEXT: [[ADD:%.*]] = add i64 [[DIV1]], [[DIV2]] -; VF4-NEXT: [[GEP_LD:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[ADD]] -; VF4-NEXT: [[LD:%.*]] = load i64, ptr [[GEP_LD]], align 8 -; VF4-NEXT: [[CALC:%.*]] = add nsw i64 [[LD]], 42 -; VF4-NEXT: [[GEP_ST:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; VF4-NEXT: store i64 [[CALC]], ptr [[GEP_ST]], align 8 -; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 -; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -933,7 +705,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -954,7 +726,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1009,7 +781,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1030,7 +802,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1092,7 +864,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1113,7 +885,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1168,7 +940,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1189,7 +961,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1251,7 +1023,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1272,7 +1044,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1327,7 +1099,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1348,7 +1120,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1406,7 +1178,7 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1427,7 +1199,7 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1470,7 +1242,7 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1491,7 +1263,7 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1549,7 +1321,7 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1570,7 +1342,7 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1613,7 +1385,7 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1634,7 +1406,7 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1692,7 +1464,7 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1713,7 +1485,7 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1756,7 +1528,7 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1777,7 +1549,7 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -1840,7 +1612,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -1861,7 +1633,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -1917,7 +1689,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -1938,7 +1710,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -2001,7 +1773,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -2022,7 +1794,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -2078,7 +1850,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -2099,7 +1871,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -2162,7 +1934,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -2183,7 +1955,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -2239,7 +2011,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -2260,7 +2032,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -2323,7 +2095,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -2344,7 +2116,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -2400,7 +2172,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -2421,7 +2193,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -2484,7 +2256,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -2505,7 +2277,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -2561,7 +2333,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -2582,7 +2354,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; @@ -2645,7 +2417,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF2: scalar.ph: @@ -2666,7 +2438,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF2-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]] +; VF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; VF2: exit: ; VF2-NEXT: ret void ; @@ -2722,7 +2494,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF4: scalar.ph: @@ -2743,7 +2515,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[IV2_NEXT]] = add nsw i64 [[IV2]], 1 ; VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 ; VF4-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]] +; VF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; VF4: exit: ; VF4-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 31021d5244f32..38ad06ad5d935 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -5,7 +5,7 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-LABEL: define void @pr63340 ; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -22,21 +22,7 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ -128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[F_0_I:%.*]] = phi ptr [ [[A]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[F_0_I]], i64 1 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[IV]] -; CHECK-NEXT: store ptr [[GEP]], ptr [[GEP_B]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], -128 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ;