Skip to content

Commit b8e1bcb

Browse files
committed
Fix dominator tree mismatch
1 parent 5386786 commit b8e1bcb

File tree

5 files changed

+151
-62
lines changed

5 files changed

+151
-62
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,11 @@ class InnerLoopVectorizer {
560560
/// Introduces a new VPIRBasicBlock for \p CheckIRBB to Plan between the
561561
/// vector preheader and its predecessor, also connecting the new block to the
562562
/// scalar preheader.
563-
void introduceCheckBlockInVPlan(BasicBlock *CheckIRBB);
563+
/// If HasAliasMask is true then the vector loop will be branched to
564+
/// unconditionally, instead of there being a conditional branch to the scalar
565+
/// loop or vector loop
566+
void introduceCheckBlockInVPlan(BasicBlock *CheckIRBB,
567+
bool HasAliasMask = false);
564568

565569
/// The original loop.
566570
Loop *OrigLoop;
@@ -1882,11 +1886,9 @@ class GeneratedRTChecks {
18821886

18831887
const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
18841888
if (RtPtrChecking.Need) {
1885-
if (!MemCheckBlock) {
1886-
auto *Pred = SCEVCheckBlock ? SCEVCheckBlock : Preheader;
1887-
MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr,
1888-
"vector.memcheck");
1889-
}
1889+
auto *Pred = SCEVCheckBlock ? SCEVCheckBlock : Preheader;
1890+
MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr,
1891+
"vector.memcheck");
18901892

18911893
auto DiffChecks = RtPtrChecking.getDiffChecks();
18921894
if (DiffChecks) {
@@ -2395,7 +2397,8 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
23952397
return VectorTripCount;
23962398
}
23972399

2398-
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
2400+
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB,
2401+
bool HasAliasMask) {
23992402
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
24002403
VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor();
24012404
if (PreVectorPH->getNumSuccessors() != 1) {
@@ -2406,17 +2409,20 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
24062409
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
24072410
PreVectorPH = CheckVPIRBB;
24082411
}
2409-
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
2410-
PreVectorPH->swapSuccessors();
2412+
if (!HasAliasMask) {
2413+
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
2414+
PreVectorPH->swapSuccessors();
24112415

2412-
// We just connected a new block to the scalar preheader. Update all
2413-
// ResumePhis by adding an incoming value for it, replicating the last value.
2414-
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
2415-
auto *ResumePhi = dyn_cast<VPInstruction>(&R);
2416-
if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
2417-
continue;
2418-
ResumePhi->addOperand(
2419-
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
2416+
// We just connected a new block to the scalar preheader. Update all
2417+
// ResumePhis by adding an incoming value for it, replicating the last
2418+
// value.
2419+
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
2420+
auto *ResumePhi = dyn_cast<VPInstruction>(&R);
2421+
if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
2422+
continue;
2423+
ResumePhi->addOperand(
2424+
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
2425+
}
24202426
}
24212427
}
24222428

@@ -2555,7 +2561,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
25552561

25562562
AddedSafetyChecks = true;
25572563

2558-
introduceCheckBlockInVPlan(MemCheckBlock);
2564+
introduceCheckBlockInVPlan(MemCheckBlock, RTChecks.HasAliasMask);
25592565
return MemCheckBlock;
25602566
}
25612567

llvm/test/Transforms/LoopVectorize/AArch64/alias_mask.ll

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,15 @@ define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) {
2828
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
2929
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
3030
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16
31-
; CHECK-NEXT: [[ALIAS_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.experimental.get.alias.lane.mask.nxv16i1.i64.i64(i64 [[B2]], i64 [[C1]], i64 1, i1 true)
31+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[B2]], i64 0
32+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
33+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[C1]], i64 0
34+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 16 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
35+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 16 x i64> [[BROADCAST_SPLAT6]], i32 0
36+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <vscale x 16 x i64> [[BROADCAST_SPLAT]], i32 0
37+
; CHECK-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr
38+
; CHECK-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP14]] to ptr
39+
; CHECK-NEXT: [[ALIAS_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.experimental.loop.dependence.war.mask.nxv16i1(ptr [[TMP30]], ptr [[TMP31]], i64 1)
3240
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
3341
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP8]], 16
3442
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[TMP15]]
@@ -39,24 +47,23 @@ define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) {
3947
; CHECK: vector.body:
4048
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4149
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
42-
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
4350
; CHECK-NEXT: [[TMP25:%.*]] = and <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], [[ALIAS_LANE_MASK]]
44-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP14]]
51+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
4552
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
4653
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP17]], i32 1, <vscale x 16 x i1> [[TMP25]], <vscale x 16 x i8> poison)
47-
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP14]]
54+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
4855
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 0
4956
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP19]], i32 1, <vscale x 16 x i1> [[TMP25]], <vscale x 16 x i8> poison)
5057
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD5]], [[WIDE_MASKED_LOAD]]
51-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP14]]
58+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX]]
5259
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP21]], i32 0
5360
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP20]], ptr [[TMP22]], i32 1, <vscale x 16 x i1> [[TMP25]])
5461
; CHECK-NEXT: [[TMP28:%.*]] = zext <vscale x 16 x i1> [[ALIAS_LANE_MASK]] to <vscale x 16 x i8>
5562
; CHECK-NEXT: [[TMP23:%.*]] = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> [[TMP28]])
5663
; CHECK-NEXT: [[TMP24:%.*]] = zext i8 [[TMP23]] to i64
5764
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP24]]
5865
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP13]])
59-
; CHECK-NEXT: [[TMP26:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
66+
; CHECK-NEXT: [[TMP26:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
6067
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <vscale x 16 x i1> [[TMP26]], i32 0
6168
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6269
;
@@ -109,7 +116,15 @@ define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n)
109116
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
110117
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
111118
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
112-
; CHECK-NEXT: [[ALIAS_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.get.alias.lane.mask.nxv4i1.i64.i64(i64 [[C2]], i64 [[B1]], i64 4, i1 false)
119+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[C2]], i64 0
120+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
121+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[B1]], i64 0
122+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
123+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i64> [[BROADCAST_SPLAT6]], i32 0
124+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <vscale x 4 x i64> [[BROADCAST_SPLAT]], i32 0
125+
; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP28]] to ptr
126+
; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP15]] to ptr
127+
; CHECK-NEXT: [[ALIAS_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.loop.dependence.raw.mask.nxv4i1(ptr [[TMP34]], ptr [[TMP35]], i64 4)
113128
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
114129
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP9]], 4
115130
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP16]]
@@ -121,15 +136,14 @@ define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n)
121136
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
122137
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
123138
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
124-
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
125139
; CHECK-NEXT: [[TMP31:%.*]] = and <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], [[ALIAS_LANE_MASK]]
126-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
140+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
127141
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
128142
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 2, <vscale x 4 x i1> [[TMP31]], <vscale x 4 x i32> poison)
129-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP15]]
143+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
130144
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
131145
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP20]], i32 2, <vscale x 4 x i1> [[TMP31]])
132-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]]
146+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
133147
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0
134148
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP22]], i32 2, <vscale x 4 x i1> [[TMP31]], <vscale x 4 x i32> poison)
135149
; CHECK-NEXT: [[TMP23:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
@@ -140,7 +154,7 @@ define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n)
140154
; CHECK-NEXT: [[TMP27:%.*]] = zext i8 [[TMP26]] to i64
141155
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP27]]
142156
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]])
143-
; CHECK-NEXT: [[TMP29:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
157+
; CHECK-NEXT: [[TMP29:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
144158
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <vscale x 4 x i1> [[TMP29]], i32 0
145159
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
146160
;

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
103103
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 16
104104
; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[DST2]], [[SRC3]]
105105
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
106-
; PRED-NEXT: br label %[[SCALAR_PH]]
107-
; PRED: [[VECTOR_PH:.*]]:
106+
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
107+
; PRED: [[VECTOR_PH]]:
108108
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
109109
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
110110
; PRED-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
@@ -159,7 +159,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
159159
; PRED: [[MIDDLE_BLOCK]]:
160160
; PRED-NEXT: br label %[[EXIT:.*]]
161161
; PRED: [[SCALAR_PH]]:
162-
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
162+
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
163163
; PRED-NEXT: br label %[[LOOP:.*]]
164164
; PRED: [[LOOP]]:
165165
; PRED-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-tail-folding-style=data-and-control -disable-output %s 2>&1 | FileCheck %s
3+
4+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5+
6+
; Tests for printing predicated VPlans.
7+
8+
define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
9+
; CHECK-LABEL: 'alias_mask'
10+
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
11+
; CHECK-NEXT: Live-in vp<%0> = VF
12+
; CHECK-NEXT: vp<%3> = original trip-count
13+
; CHECK-EMPTY:
14+
; CHECK-NEXT: ir-bb<for.body.preheader>:
15+
; CHECK-NEXT: IR %wide.trip.count = zext nneg i32 %n to i64
16+
; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
17+
; CHECK-NEXT: EMIT vp<%4> = EXPAND SCEV (ptrtoint ptr %c to i64)
18+
; CHECK-NEXT: EMIT vp<%5> = EXPAND SCEV (ptrtoint ptr %b to i64)
19+
; CHECK-NEXT: Successor(s): vector.ph
20+
; CHECK-EMPTY:
21+
; CHECK-NEXT: vector.ph:
22+
; CHECK-NEXT: EMIT vp<%6> = ALIAS-LANE-MASK vp<%5>, vp<%4> (write-after-read)
23+
; CHECK-NEXT: EMIT vp<%index.part.next> = VF * Part + ir<0>
24+
; CHECK-NEXT: EMIT vp<%active.lane.mask.entry> = active lane mask vp<%index.part.next>, vp<%3>
25+
; CHECK-NEXT: Successor(s): vector loop
26+
; CHECK-EMPTY:
27+
; CHECK-NEXT: <x1> vector loop: {
28+
; CHECK-NEXT: vector.body:
29+
; CHECK-NEXT: EMIT vp<%7> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
30+
; CHECK-NEXT: ACTIVE-LANE-MASK-PHI vp<%8> = phi vp<%active.lane.mask.entry>, vp<%active.lane.mask.next>
31+
; CHECK-NEXT: EMIT vp<%9> = and vp<%8>, vp<%6>
32+
; CHECK-NEXT: Successor(s): pred.store
33+
; CHECK-EMPTY:
34+
; CHECK-NEXT: <xVFxUF> pred.store: {
35+
; CHECK-NEXT: pred.store.entry:
36+
; CHECK-NEXT: BRANCH-ON-MASK vp<%9>
37+
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
38+
; CHECK-EMPTY:
39+
; CHECK-NEXT: pred.store.if:
40+
; CHECK-NEXT: vp<%10> = SCALAR-STEPS vp<%7>, ir<1>, vp<%0>
41+
; CHECK-NEXT: REPLICATE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<%10>
42+
; CHECK-NEXT: REPLICATE ir<%0> = load ir<%arrayidx>
43+
; CHECK-NEXT: REPLICATE ir<%arrayidx2> = getelementptr inbounds ir<%b>, vp<%10>
44+
; CHECK-NEXT: REPLICATE ir<%1> = load ir<%arrayidx2>
45+
; CHECK-NEXT: REPLICATE ir<%arrayidx6> = getelementptr inbounds ir<%c>, vp<%10>
46+
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%1>, ir<%0>
47+
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%arrayidx6>
48+
; CHECK-NEXT: Successor(s): pred.store.continue
49+
; CHECK-EMPTY:
50+
; CHECK-NEXT: pred.store.continue:
51+
; CHECK-NEXT: No successors
52+
; CHECK-NEXT: }
53+
; CHECK-NEXT: Successor(s): for.body.2
54+
; CHECK-EMPTY:
55+
; CHECK-NEXT: for.body.2:
56+
; CHECK-NEXT: EMIT vp<%popcount> = popcount vp<%6>
57+
; CHECK-NEXT: EMIT vp<%index.next> = add vp<%7>, vp<%popcount>
58+
; CHECK-NEXT: EMIT vp<%11> = VF * Part + vp<%index.next>
59+
; CHECK-NEXT: EMIT vp<%active.lane.mask.next> = active lane mask vp<%11>, vp<%3>
60+
; CHECK-NEXT: EMIT vp<%12> = not vp<%active.lane.mask.next>
61+
; CHECK-NEXT: EMIT branch-on-cond vp<%12>
62+
; CHECK-NEXT: No successors
63+
; CHECK-NEXT: }
64+
; CHECK-NEXT: Successor(s): middle.block
65+
; CHECK-EMPTY:
66+
; CHECK-NEXT: middle.block:
67+
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>
68+
; CHECK-EMPTY:
69+
; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
70+
; CHECK-NEXT: No successors
71+
; CHECK-NEXT: }
72+
entry:
73+
%cmp11 = icmp sgt i32 %n, 0
74+
br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
75+
76+
for.body.preheader: ; preds = %entry
77+
%wide.trip.count = zext nneg i32 %n to i64
78+
br label %for.body
79+
80+
for.cond.cleanup.loopexit: ; preds = %for.body
81+
br label %for.cond.cleanup
82+
83+
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
84+
ret void
85+
86+
for.body: ; preds = %for.body.preheader, %for.body
87+
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
88+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
89+
%0 = load i8, ptr %arrayidx, align 1
90+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
91+
%1 = load i8, ptr %arrayidx2, align 1
92+
%add = add i8 %1, %0
93+
%arrayidx6 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
94+
store i8 %add, ptr %arrayidx6, align 1
95+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
96+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
97+
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
98+
}

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,35 +1058,6 @@ exit:
10581058
ret void
10591059
}
10601060

1061-
define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
1062-
entry:
1063-
%cmp11 = icmp sgt i32 %n, 0
1064-
br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
1065-
1066-
for.body.preheader: ; preds = %entry
1067-
%wide.trip.count = zext nneg i32 %n to i64
1068-
br label %for.body
1069-
1070-
for.cond.cleanup.loopexit: ; preds = %for.body
1071-
br label %for.cond.cleanup
1072-
1073-
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
1074-
ret void
1075-
1076-
for.body: ; preds = %for.body.preheader, %for.body
1077-
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
1078-
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
1079-
%0 = load i8, ptr %arrayidx, align 1
1080-
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
1081-
%1 = load i8, ptr %arrayidx2, align 1
1082-
%add = add i8 %1, %0
1083-
%arrayidx6 = getelementptr inbounds i8, ptr %c, i64 %indvars.iv
1084-
store i8 %add, ptr %arrayidx6, align 1
1085-
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1086-
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
1087-
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
1088-
}
1089-
10901061
!llvm.dbg.cu = !{!0}
10911062
!llvm.module.flags = !{!3, !4}
10921063

0 commit comments

Comments
 (0)