Skip to content

Commit 706b681

Browse files
committed
!fixup address latest comments, thanks!
1 parent 1222e23 commit 706b681

File tree

5 files changed

+77
-104
lines changed

5 files changed

+77
-104
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,8 @@ void VPlan::execute(VPTransformState *State) {
10541054
if (isa<VPWidenPHIRecipe>(&R))
10551055
continue;
10561056

1057-
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1058-
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1057+
if (isa<VPWidenPointerInductionRecipe, VPWidenIntOrFpInductionRecipe>(
1058+
&R)) {
10591059
PHINode *Phi = nullptr;
10601060
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
10611061
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
@@ -1085,10 +1085,10 @@ void VPlan::execute(VPTransformState *State) {
10851085
// previous iteration. For non-ordered reductions all UF parts are
10861086
// generated.
10871087
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1088+
auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(PhiR);
10881089
bool NeedsScalar =
10891090
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1090-
(isa<VPReductionPHIRecipe>(PhiR) &&
1091-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1091+
(RedPhiR && RedPhiR->isInLoop());
10921092
Value *Phi = State->get(PhiR, NeedsScalar);
10931093
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
10941094
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -693,9 +693,14 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
693693
SmallVector<VPValue *> PossiblyDead(Term->operands());
694694
Term->eraseFromParent();
695695
auto *Header = cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getEntry());
696-
if (all_of(Header->phis(), [](VPRecipeBase &R) {
697-
return !isa<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>(&R);
698-
})) {
696+
if (any_of(Header->phis(),
697+
IsaPred<VPWidenIntOrFpInductionRecipe, VPReductionPHIRecipe>)) {
698+
LLVMContext &Ctx = SE.getContext();
699+
auto *BOC = new VPInstruction(
700+
VPInstruction::BranchOnCond,
701+
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
702+
ExitingVPBB->appendRecipe(BOC);
703+
} else {
699704
for (VPRecipeBase &R : make_early_inc_range(Header->phis())) {
700705
auto *P = cast<VPHeaderPHIRecipe>(&R);
701706
P->replaceAllUsesWith(P->getStartValue());
@@ -720,13 +725,6 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
720725
// be deleted when the region is deleted.
721726
LoopRegion->clearEntry();
722727
delete LoopRegion;
723-
} else {
724-
LLVMContext &Ctx = SE.getContext();
725-
auto *BOC =
726-
new VPInstruction(VPInstruction::BranchOnCond,
727-
{Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
728-
729-
ExitingVPBB->appendRecipe(BOC);
730728
}
731729
for (VPValue *Op : PossiblyDead)
732730
recursivelyDeleteDeadRecipes(Op);

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -56,34 +56,30 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
5656
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
5757
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
5858
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 3)
59-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0
60-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
61-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP6]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
62-
; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
6359
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
6460
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
6561
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
6662
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
67-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
63+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
6864
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
6965
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
7066
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
71-
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
72-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
73-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
67+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
68+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
69+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
7470
; CHECK: middle.block:
7571
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
7672
; CHECK: scalar.ph:
7773
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
7874
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
7975
; CHECK: for.body:
8076
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
81-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
82-
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
83-
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP12]], 1
84-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
85-
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
86-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP13]]
77+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
78+
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
79+
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
80+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
81+
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
82+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
8783
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
8884
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
8985
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3
@@ -125,34 +121,30 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
125121
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
126122
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
127123
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5)
128-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0
129-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
130-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP6]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
131-
; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer)
132124
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
133125
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
134126
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
135127
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
136-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
128+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
137129
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
138130
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
139131
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
140-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
141-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
142-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
132+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
133+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
134+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
143135
; CHECK: middle.block:
144136
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
145137
; CHECK: scalar.ph:
146138
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
147139
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
148140
; CHECK: for.body:
149141
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
150-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
151-
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
152-
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP12]], 1
153-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
154-
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
155-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP13]]
142+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
143+
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
144+
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
145+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
146+
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
147+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
156148
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
157149
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
158150
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
@@ -194,34 +186,30 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
194186
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
195187
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
196188
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
197-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 0
198-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
199-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP6]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
200-
; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i64 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer)
201189
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
202190
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
203191
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
204192
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
205-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
193+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
206194
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
207195
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
208196
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
209-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
210-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
211-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
197+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
198+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
199+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
212200
; CHECK: middle.block:
213201
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
214202
; CHECK: scalar.ph:
215203
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
216204
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
217205
; CHECK: for.body:
218206
; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
219-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
220-
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
221-
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP12]], 1
222-
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
223-
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
224-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP13]]
207+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
208+
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
209+
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
210+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
211+
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
212+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
225213
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
226214
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
227215
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8

llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,22 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran
1212
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP0]]
1313
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
1414
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
15-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
16-
; CHECK: vector.body:
17-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
18-
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
19-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 [[TMP3]], i32 4)
20-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
21-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
22-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32.p0(ptr [[TMP5]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i32> poison)
15+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 4)
16+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4:%.*]], i32 0
17+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
18+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32.p0(ptr [[TMP7]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i32> poison)
2319
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 1 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1)
24-
; CHECK-NEXT: call void @llvm.masked.store.nxv1i32.p0(<vscale x 1 x i32> [[TMP6]], ptr [[TMP5]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
25-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP2]]
26-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
20+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
21+
; CHECK-NEXT: call void @llvm.masked.store.nxv1i32.p0(<vscale x 1 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
22+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
2723
; CHECK: middle.block:
2824
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
2925
; CHECK: scalar.ph:
3026
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
3127
; CHECK-NEXT: br label [[LOOP:%.*]]
3228
; CHECK: loop:
3329
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
34-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]
30+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[IV]]
3531
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
3632
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1
3733
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4
@@ -71,26 +67,22 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang
7167
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
7268
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
7369
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 4
74-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
75-
; CHECK: vector.body:
76-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
77-
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
78-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP5]], i32 4)
79-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]]
80-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
81-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
70+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
71+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6:%.*]], i32 0
72+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
73+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
8274
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1)
83-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
84-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
85-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
75+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
76+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
77+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
8678
; CHECK: middle.block:
8779
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
8880
; CHECK: scalar.ph:
8981
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
9082
; CHECK-NEXT: br label [[LOOP:%.*]]
9183
; CHECK: loop:
9284
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
93-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]
85+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[IV]]
9486
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
9587
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1
9688
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4

0 commit comments

Comments
 (0)