Skip to content

Commit bbe4f92

Browse files
committed
[VPlan] Use VPlan operand order for VPBlendRecipes.
Remove use of IR references and the order of incoming values of IR phis when creating VPBlendRecipes. Instead, simply use the incoming operands and blocks from the VPWidenPHIRecipe. Note that this changes the order of the incoming operands/masks for some blends.
1 parent 7500cea commit bbe4f92

17 files changed

+92
-99
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8538,36 +8538,30 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
85388538
return nullptr;
85398539
}
85408540

8541-
VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
8542-
ArrayRef<VPValue *> Operands) {
8543-
unsigned NumIncoming = Phi->getNumIncomingValues();
8541+
VPBlendRecipe *VPRecipeBuilder::tryToBlend(VPWidenPHIRecipe *PhiR) {
8542+
unsigned NumIncoming = PhiR->getNumIncoming();
85448543

85458544
// We know that all PHIs in non-header blocks are converted into selects, so
85468545
// we don't have to worry about the insertion order and we can just use the
85478546
// builder. At this point we generate the predication tree. There may be
85488547
// duplications since this is a simple recursive scan, but future
85498548
// optimizations will clean it up.
85508549

8551-
// Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
8552-
// TODO: Add operands and masks in order from the VPlan predecessors.
8553-
DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
8554-
for (const auto &[Idx, Pred] : enumerate(predecessors(Phi->getParent())))
8555-
VPIncomingValues[Pred] = Operands[Idx];
8556-
85578550
SmallVector<VPValue *, 2> OperandsWithMask;
85588551
for (unsigned In = 0; In < NumIncoming; In++) {
8559-
BasicBlock *Pred = Phi->getIncomingBlock(In);
8560-
OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
8561-
VPValue *EdgeMask = getEdgeMask(Pred, Phi->getParent());
8552+
const VPBasicBlock *Pred = PhiR->getIncomingBlock(In);
8553+
OperandsWithMask.push_back(PhiR->getIncomingValue(In));
8554+
VPValue *EdgeMask = getEdgeMask(Pred, PhiR->getParent());
85628555
if (!EdgeMask) {
85638556
assert(In == 0 && "Both null and non-null edge masks found");
8564-
assert(all_equal(Operands) &&
8557+
assert(all_equal(PhiR->operands()) &&
85658558
"Distinct incoming values with one having a full mask");
85668559
break;
85678560
}
85688561
OperandsWithMask.push_back(EdgeMask);
85698562
}
8570-
return new VPBlendRecipe(Phi, OperandsWithMask);
8563+
return new VPBlendRecipe(cast<PHINode>(PhiR->getUnderlyingInstr()),
8564+
OperandsWithMask);
85718565
}
85728566

85738567
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
@@ -8954,15 +8948,18 @@ bool VPRecipeBuilder::getScaledReductions(
89548948
return false;
89558949
}
89568950

8957-
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8958-
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range) {
8951+
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
8952+
VFRange &Range) {
89598953
// First, check for specific widening recipes that deal with inductions, Phi
89608954
// nodes, calls and memory operations.
89618955
VPRecipeBase *Recipe;
8962-
if (auto *Phi = dyn_cast<PHINode>(Instr)) {
8963-
if (Phi->getParent() != OrigLoop->getHeader())
8964-
return tryToBlend(Phi, Operands);
8956+
Instruction *Instr = R->getUnderlyingInstr();
8957+
SmallVector<VPValue *, 4> Operands(R->operands());
8958+
if (auto *PhiR = dyn_cast<VPWidenPHIRecipe>(R)) {
8959+
if (PhiR->getParent()->getNumPredecessors() != 0)
8960+
return tryToBlend(PhiR);
89658961

8962+
auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
89668963
assert(Operands.size() == 2 && "Must have 2 operands for header phis");
89678964
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
89688965
return Recipe;
@@ -9526,11 +9523,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
95269523
continue;
95279524
}
95289525

9529-
SmallVector<VPValue *, 4> Operands(R.operands());
95309526
VPRecipeBase *Recipe =
9531-
RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range);
9532-
if (!Recipe)
9527+
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
9528+
if (!Recipe) {
9529+
SmallVector<VPValue *, 4> Operands(R.operands());
95339530
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
9531+
}
95349532

95359533
RecipeBuilder.setRecipe(Instr, Recipe);
95369534
if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) {

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class VPRecipeBuilder {
125125
/// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
126126
/// all such phi nodes are turned into a sequence of select instructions as
127127
/// the vectorizer currently performs full if-conversion.
128-
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
128+
VPBlendRecipe *tryToBlend(VPWidenPHIRecipe *PhiR);
129129

130130
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
131131
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
@@ -179,11 +179,9 @@ class VPRecipeBuilder {
179179
/// that are valid so recipes can be formed later.
180180
void collectScaledReductions(VFRange &Range);
181181

182-
/// Create and return a widened recipe for \p I if one can be created within
182+
/// Create and return a widened recipe for \p R if one can be created within
183183
/// the given VF \p Range.
184-
VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
185-
ArrayRef<VPValue *> Operands,
186-
VFRange &Range);
184+
VPRecipeBase *tryToCreateWidenRecipe(VPSingleDefRecipe *R, VFRange &Range);
187185

188186
/// Create and return a partial reduction recipe for a reduction instruction
189187
/// along with binary operation and reduction phi operands.

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr
2828
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer
2929
; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true)
3030
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP7]], [[TMP8]]
31-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP7]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer
31+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer
3232
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP9]], i32 0
3333
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
3434
; CHECK: [[PRED_STORE_IF]]:
@@ -219,7 +219,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
219219
; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true)
220220
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer
221221
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]]
222-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1)
222+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> splat (i8 1), <16 x i8> zeroinitializer
223223
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
224224
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
225225
; CHECK: [[PRED_STORE_IF]]:

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
354354
; TFCOMMON-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]])
355355
; TFCOMMON-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer
356356
; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP10]])
357-
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]]
357+
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP9]]
358358
; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
359359
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
360360
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -397,8 +397,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
397397
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
398398
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP19]])
399399
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP20]])
400-
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i64> [[TMP17]], <vscale x 2 x i64> [[TMP21]]
401-
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP16]], <vscale x 2 x i64> [[TMP18]], <vscale x 2 x i64> [[TMP22]]
400+
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i64> [[TMP21]], <vscale x 2 x i64> [[TMP17]]
401+
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i64> [[TMP22]], <vscale x 2 x i64> [[TMP18]]
402402
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
403403
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
404404
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2

llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
4646
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], splat (i1 true)
4747
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer
4848
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]]
49-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 8 x i1> [[TMP19]], i32 0
50-
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[EXT]], i64 [[INDEX]], i64 poison
49+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP21]], i32 0
50+
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]]
5151
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]]
5252
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0
5353
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr [[TMP25]], i32 2, <vscale x 8 x i1> [[TMP22]])

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ define void @test(ptr %p, i64 %a, i8 %b) {
2020
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
2121
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
2222
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
23-
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <16 x i32> [[VEC_IND]], splat (i32 2)
23+
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
25-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP3]], <16 x i32> [[TMP2]]
25+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
2828
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0

llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) {
3333
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
3434
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]]
3535
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
36-
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 20)
36+
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19)
3737
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4)
3838
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5)
39-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> splat (i32 3)
39+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> splat (i32 3), <4 x i32> [[TMP10]]
4040
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9)
4141
; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]]
4242
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -141,16 +141,14 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) {
141141
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]]
142142
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
143143
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19)
144-
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
145-
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]]
146144
; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4)
147145
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5)
148146
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 6), <4 x i32> splat (i32 11)
149147
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]]
150-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> splat (i32 9)
151-
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]]
152-
; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> splat (i32 18)
153-
; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]]
148+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> [[TMP12]]
149+
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9)
150+
; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> [[TMP13]]
151+
; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI4]], <4 x i32> splat (i32 18)
154152
; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]]
155153
; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]]
156154
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/if-reduction.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,11 +1207,11 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
12071207
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
12081208
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
12091209
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
1210-
; CHECK-NEXT: [[TMP6:%.*]] = fcmp uge <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
1211-
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
1210+
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
12121211
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
1212+
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
12131213
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
1214-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]
1214+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x float> [[TMP9]], <4 x float> [[TMP8]]
12151215
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]]
12161216
; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]]
12171217
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -1335,8 +1335,8 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl
13351335
; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
13361336
; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
13371337
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
1338-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP6]]
1339-
; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
1338+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
1339+
; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[PREDPHI]]
13401340
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
13411341
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
13421342
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]

llvm/test/Transforms/LoopVectorize/no_outside_user.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,10 @@ define i32 @test3(i32 %N) {
185185
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
186186
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
187187
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
188-
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
188+
; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
189189
; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer
190-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
191-
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]]
190+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2)
191+
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[PREDPHI]]
192192
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
193193
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
194194
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/phi-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,10 @@ define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) {
102102
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
103103
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
104104
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
105-
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 20)
105+
; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 19)
106106
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], splat (i32 4)
107107
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 4), <2 x i32> splat (i32 5)
108-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> splat (i32 3)
108+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 3), <2 x i32> [[TMP8]]
109109
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 9)
110110
; CHECK-NEXT: store <2 x i32> [[PREDPHI3]], ptr [[TMP3]], align 4
111111
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2

0 commit comments

Comments
 (0)