Skip to content

Commit 641c647

Browse files
committed
!fixup address latest comments, thanks!
1 parent 2fd15a4 commit 641c647

13 files changed

+624
-267
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2660,6 +2660,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
26602660
LoopScalarPreHeader =
26612661
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
26622662
LI, nullptr, Twine(Prefix) + "scalar.ph");
2663+
// NOTE: The Plan's scalar preheader isn't replaced with a VPIRBasicBlock
2664+
// wrapping LoopScalarPreHeader here at the moment, because the Plan's scalar
2665+
// preheader may be unreachable at this point.
26632666
}
26642667

26652668
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -8021,6 +8024,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
80218024
Phi->removeIncomingValue(EPI.MemSafetyCheck);
80228025
}
80238026

8027+
replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
80248028
return LoopVectorPreHeader;
80258029
}
80268030

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3498,12 +3498,14 @@ class VPlan {
34983498
/// latch. If the scalar tail loop or exit block are known to always execute,
34993499
/// the middle block may branch directly to the block.
35003500
VPBasicBlock *getMiddleBlock() {
3501-
if (!getScalarPreheader()->getPredecessors().empty())
3502-
return cast<VPBasicBlock>(
3503-
getScalarPreheader()->getPredecessors().front());
3504-
if (getExitBlocks().size() == 1)
3505-
return cast<VPBasicBlock>(getExitBlocks()[0]->getPredecessors().front());
3506-
return nullptr;
3501+
VPRegionBlock *LoopRegion = getVectorLoopRegion();
3502+
if (!LoopRegion)
3503+
return nullptr;
3504+
auto *RegionSucc = LoopRegion->getSingleSuccessor();
3505+
if (RegionSucc->getSingleSuccessor() ||
3506+
is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
3507+
return cast<VPBasicBlock>(RegionSucc);
3508+
return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
35073509
}
35083510
const VPBasicBlock *getMiddleBlock() const {
35093511
return const_cast<VPlan *>(this)->getMiddleBlock();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,19 +1473,22 @@ void VPlanTransforms::truncateToMinimalBitwidths(
14731473

14741474
/// Remove BranchOnCond recipes with true conditions together with removing
14751475
/// dead edges to their successors.
1476-
static void simplifyCFG(VPlan &Plan) {
1476+
static void simplifyBranchOnCondTrue(VPlan &Plan) {
14771477
using namespace llvm::VPlanPatternMatch;
14781478
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1479-
vp_depth_first_deep(Plan.getEntry()))) {
1479+
vp_depth_first_shallow(Plan.getEntry()))) {
14801480
if (VPBB->getNumSuccessors() != 2 ||
14811481
!match(&VPBB->back(), m_BranchOnCond(m_True())))
14821482
continue;
14831483

14841484
VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
14851485
const auto &Preds = RemovedSucc->getPredecessors();
1486+
assert(count(Preds, VPBB) == 1 &&
1487+
"There must be a single edge between VPBB and its successor");
14861488
unsigned DeadIdx = std::distance(Preds.begin(), find(Preds, VPBB));
14871489

1488-
// Remove values coming from VPBB from phi-like recipes in RemovedSucc.
1490+
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
1491+
// from these recipes.
14891492
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
14901493
assert((!isa<VPIRInstruction>(&R) ||
14911494
!isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
@@ -1495,20 +1498,20 @@ static void simplifyCFG(VPlan &Plan) {
14951498
if (!VPI || VPI->getOpcode() != VPInstruction::ResumePhi)
14961499
break;
14971500
VPBuilder B(VPI);
1498-
SmallVector<VPValue *> NewOps;
1501+
SmallVector<VPValue *> NewOperands;
14991502
// Create new operand list, with the dead incoming value filtered out.
15001503
for (const auto &[Idx, Op] : enumerate(VPI->operands())) {
15011504
if (Idx == DeadIdx)
15021505
continue;
1503-
NewOps.push_back(Op);
1506+
NewOperands.push_back(Op);
15041507
}
1505-
VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi, NewOps,
1506-
VPI->getDebugLoc(),
1508+
VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi,
1509+
NewOperands, VPI->getDebugLoc(),
15071510
VPI->getName()));
15081511
VPI->eraseFromParent();
15091512
}
15101513
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
1511-
// automatically on VPlan destruction.
1514+
// automatically on VPlan destruction if it becomes unreachable.
15121515
VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc);
15131516
VPBB->back().eraseFromParent();
15141517
}
@@ -1523,7 +1526,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
15231526
runPass(legalizeAndOptimizeInductions, Plan);
15241527
runPass(removeRedundantExpandSCEVRecipes, Plan);
15251528
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1526-
runPass(simplifyCFG, Plan);
1529+
runPass(simplifyBranchOnCondTrue, Plan);
15271530
runPass(removeDeadRecipes, Plan);
15281531

15291532
runPass(createAndOptimizeReplicateRegions, Plan);

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -460,12 +460,18 @@ define void @latch_branch_cost(ptr %dst) {
460460
; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
461461
; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
462462
; PRED: [[MIDDLE_BLOCK]]:
463-
; PRED-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
463+
; PRED-NEXT: br label %[[EXIT:.*]]
464464
; PRED: [[SCALAR_PH]]:
465-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 104, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
465+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
466466
; PRED-NEXT: br label %[[LOOP:.*]]
467467
; PRED: [[LOOP]]:
468-
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
468+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
469+
; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
470+
; PRED-NEXT: store i8 0, ptr [[GEP]], align 1
471+
; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
472+
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
473+
; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
474+
; PRED: [[EXIT]]:
469475
; PRED-NEXT: ret void
470476
;
471477
entry:
@@ -606,6 +612,10 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
606612
;
607613
; PRED-LABEL: define i32 @header_mask_and_invariant_compare(
608614
; PRED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
615+
; PRED-NEXT: [[ENTRY:.*]]:
616+
; PRED-NEXT: br label %[[LOOP_HEADER:.*]]
617+
; PRED: [[LOOP_HEADER]]:
618+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
609619
; PRED-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4
610620
; PRED-NEXT: [[L_B:%.*]] = load i32, ptr [[B]], align 4
611621
; PRED-NEXT: [[OR:%.*]] = or i32 [[L_B]], [[L_A]]
@@ -732,6 +742,16 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
732742
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
733743
; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
734744
; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0
745+
; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
746+
; PRED: [[MIDDLE_BLOCK]]:
747+
; PRED-NEXT: br label %[[EXIT:.*]]
748+
; PRED: [[SCALAR_PH]]:
749+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[DST]], %[[ENTRY]] ]
750+
; PRED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
751+
; PRED-NEXT: br label %[[LOOP:.*]]
752+
; PRED: [[LOOP]]:
753+
; PRED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
754+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
735755
; PRED-NEXT: [[L:%.*]] = load i16, ptr [[SRC]], align 2
736756
; PRED-NEXT: [[O:%.*]] = or i16 [[L]], 1
737757
; PRED-NEXT: [[CONV:%.*]] = uitofp i16 [[O]] to double
@@ -850,6 +870,14 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
850870
; DEFAULT: [[PRED_STORE_CONTINUE14]]:
851871
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
852872
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
873+
; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
874+
; DEFAULT: [[MIDDLE_BLOCK]]:
875+
; DEFAULT-NEXT: br label %[[EXIT:.*]]
876+
; DEFAULT: [[SCALAR_PH]]:
877+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
878+
; DEFAULT-NEXT: br label %[[LOOP:.*]]
879+
; DEFAULT: [[LOOP]]:
880+
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
853881
; DEFAULT-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8
854882
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
855883
; DEFAULT-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1
@@ -944,6 +972,14 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
944972
; PRED: [[PRED_STORE_CONTINUE14]]:
945973
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
946974
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
975+
; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
976+
; PRED: [[MIDDLE_BLOCK]]:
977+
; PRED-NEXT: br label %[[EXIT:.*]]
978+
; PRED: [[SCALAR_PH]]:
979+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
980+
; PRED-NEXT: br label %[[LOOP:.*]]
981+
; PRED: [[LOOP]]:
982+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
947983
; PRED-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8
948984
; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
949985
; PRED-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1
@@ -1373,6 +1409,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
13731409
; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
13741410
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
13751411
; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0
1412+
; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1413+
; PRED: [[MIDDLE_BLOCK]]:
1414+
; PRED-NEXT: br label %[[EXIT:.*]]
1415+
; PRED: [[SCALAR_PH]]:
1416+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
1417+
; PRED-NEXT: br label %[[LOOP_HEADER:.*]]
1418+
; PRED: [[LOOP_HEADER]]:
1419+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
13761420
; PRED-NEXT: [[TMP86:%.*]] = load float, ptr [[SRC_1]], align 4
13771421
; PRED-NEXT: [[TMP87:%.*]] = load float, ptr [[SRC_2]], align 4
13781422
; PRED-NEXT: [[MUL8_I_US:%.*]] = fmul float [[TMP87]], 0.000000e+00
@@ -1478,6 +1522,18 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
14781522
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
14791523
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
14801524
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
1525+
; DEFAULT-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1526+
; DEFAULT: [[MIDDLE_BLOCK]]:
1527+
; DEFAULT-NEXT: br label %[[EXIT:.*]]
1528+
; DEFAULT: [[SCALAR_PH]]:
1529+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
1530+
; DEFAULT-NEXT: br label %[[LOOP_HEADER:.*]]
1531+
; DEFAULT: [[LOOP_HEADER]]:
1532+
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1533+
; DEFAULT-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
1534+
; DEFAULT: [[THEN]]:
1535+
; DEFAULT-NEXT: br label %[[LOOP_LATCH]]
1536+
; DEFAULT: [[LOOP_LATCH]]:
14811537
; DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
14821538
; DEFAULT-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
14831539
; DEFAULT-NEXT: store i32 [[T]], ptr [[DST]], align 4
@@ -1529,6 +1585,18 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
15291585
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
15301586
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
15311587
; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
1588+
; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1589+
; PRED: [[MIDDLE_BLOCK]]:
1590+
; PRED-NEXT: br label %[[EXIT:.*]]
1591+
; PRED: [[SCALAR_PH]]:
1592+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
1593+
; PRED-NEXT: br label %[[LOOP_HEADER:.*]]
1594+
; PRED: [[LOOP_HEADER]]:
1595+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1596+
; PRED-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
1597+
; PRED: [[THEN]]:
1598+
; PRED-NEXT: br label %[[LOOP_LATCH]]
1599+
; PRED: [[LOOP_LATCH]]:
15321600
; PRED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
15331601
; PRED-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
15341602
; PRED-NEXT: store i32 [[T]], ptr [[DST]], align 4

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,17 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
139139
; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0
140140
; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP23]], ptr [[TMP27]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
141141
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
142+
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP15]])
143+
; PRED-NEXT: [[TMP28:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
144+
; PRED-NEXT: [[TMP29:%.*]] = extractelement <vscale x 16 x i1> [[TMP28]], i32 0
145+
; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
146+
; PRED: [[MIDDLE_BLOCK]]:
147+
; PRED-NEXT: br label %[[EXIT:.*]]
148+
; PRED: [[SCALAR_PH]]:
149+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
150+
; PRED-NEXT: br label %[[LOOP:.*]]
151+
; PRED: [[LOOP]]:
152+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
142153
; PRED-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
143154
; PRED-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
144155
; PRED-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
@@ -303,6 +314,14 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
303314
; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
304315
; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
305316
; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0
317+
; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
318+
; PRED: [[MIDDLE_BLOCK]]:
319+
; PRED-NEXT: br label %[[EXIT:.*]]
320+
; PRED: [[SCALAR_PH]]:
321+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
322+
; PRED-NEXT: br label %[[FOR_BODY:.*]]
323+
; PRED: [[FOR_BODY]]:
324+
; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
306325
; PRED-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32
307326
; PRED-NEXT: [[ADD_I:%.*]] = mul i32 [[MUL_X]], [[TRUNC_IV]]
308327
; PRED-NEXT: [[IV_MUL:%.*]] = zext i32 [[ADD_I]] to i64
@@ -489,6 +508,16 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
489508
; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
490509
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
491510
; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0
511+
; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
512+
; PRED: [[MIDDLE_BLOCK]]:
513+
; PRED-NEXT: br label %[[EXIT:.*]]
514+
; PRED: [[SCALAR_PH]]:
515+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
516+
; PRED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
517+
; PRED-NEXT: br label %[[LOOP:.*]]
518+
; PRED: [[LOOP]]:
519+
; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
520+
; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL8]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
492521
; PRED-NEXT: [[IV_1_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32
493522
; PRED-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[MUL]], [[IV_1_TRUNC]]
494523
; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1
@@ -675,6 +704,16 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
675704
; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
676705
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
677706
; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0
707+
; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
708+
; PRED: [[MIDDLE_BLOCK]]:
709+
; PRED-NEXT: br label %[[EXIT:.*]]
710+
; PRED: [[SCALAR_PH]]:
711+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
712+
; PRED-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
713+
; PRED-NEXT: br label %[[LOOP:.*]]
714+
; PRED: [[LOOP]]:
715+
; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
716+
; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL7]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
678717
; PRED-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32
679718
; PRED-NEXT: [[IV_MUL:%.*]] = mul i32 [[ADD]], [[IV_TRUNC]]
680719
; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1
@@ -806,6 +845,16 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
806845
; PRED: [[PRED_STORE_CONTINUE5]]:
807846
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
808847
; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
848+
; PRED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
849+
; PRED: [[MIDDLE_BLOCK]]:
850+
; PRED-NEXT: br label %[[EXIT:.*]]
851+
; PRED: [[SCALAR_PH]]:
852+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
853+
; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
854+
; PRED-NEXT: br label %[[LOOP:.*]]
855+
; PRED: [[LOOP]]:
856+
; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
857+
; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
809858
; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
810859
; PRED-NEXT: store i32 0, ptr [[GEP]], align 8
811860
; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1

0 commit comments

Comments
 (0)