Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2297,10 +2297,6 @@ class VPBlendRecipe : public VPSingleDefRecipe {
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenMemoryRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down
61 changes: 14 additions & 47 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,19 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
Ctx.CostKind);
}
case Instruction::Select: {
if (!getUnderlyingValue())
return 0;
// Handle cases where only the first lane is used the same way as the legacy
// cost model.
if (vputils::onlyFirstLaneUsed(this))
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
Type *ResTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResTy, CmpTy,
CmpInst::BAD_ICMP_PREDICATE,
Ctx.CostKind);
}
case VPInstruction::AnyOf: {
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
return Ctx.TTI.getArithmeticReductionCost(
Expand Down Expand Up @@ -2380,53 +2393,7 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
#endif

void VPBlendRecipe::execute(VPTransformState &State) {
assert(isNormalized() && "Expected blend to be normalized!");
// We know that all PHIs in non-header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.

unsigned NumIncoming = getNumIncomingValues();

// Generate a sequence of selects of the form:
// SELECT(Mask3, In3,
// SELECT(Mask2, In2,
// SELECT(Mask1, In1,
// In0)))
// Note that Mask0 is never used: lanes for which no path reaches this phi and
// are essentially undef are taken from In0.
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Value *Result = nullptr;
for (unsigned In = 0; In < NumIncoming; ++In) {
// We might have single edge PHIs (blocks) - use an identity
// 'select' for the first PHI operand.
Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
if (In == 0)
Result = In0; // Initialize with the first incoming value.
else {
// Select between the current value and the previous incoming edge
// based on the incoming mask.
Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
}
}
State.set(this, Result, OnlyFirstLaneUsed);
}

InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
// Handle cases where only the first lane is used the same way as the legacy
// cost model.
if (vputils::onlyFirstLaneUsed(this))
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
return (getNumIncomingValues() - 1) *
Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
44 changes: 16 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return Def->replaceAllUsesWith(X);

// select !c, x, y -> select c, y, x
VPValue *C;
if (match(Def, m_Select(m_Not(m_VPValue(C)), m_VPValue(X), m_VPValue(Y)))) {
Def->setOperand(0, C);
Def->setOperand(1, Y);
Def->setOperand(2, X);
return;
}

if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return Def->replaceAllUsesWith(A);

Expand Down Expand Up @@ -1288,38 +1297,17 @@ static void simplifyBlends(VPlan &Plan) {
}
}

SmallVector<VPValue *, 4> OperandsWithMask;
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));

VPBuilder Builder(&R);
VPValue *Select = Blend->getIncomingValue(StartIndex);
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
if (I == StartIndex)
continue;
OperandsWithMask.push_back(Blend->getIncomingValue(I));
OperandsWithMask.push_back(Blend->getMask(I));
}

auto *NewBlend = new VPBlendRecipe(
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
NewBlend->insertBefore(&R);

VPValue *DeadMask = Blend->getMask(StartIndex);
Blend->replaceAllUsesWith(NewBlend);
Blend->eraseFromParent();
recursivelyDeleteDeadRecipes(DeadMask);

/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
VPValue *NewMask;
if (NewBlend->getNumOperands() == 3 &&
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
VPValue *Inc0 = NewBlend->getOperand(0);
VPValue *Inc1 = NewBlend->getOperand(1);
VPValue *OldMask = NewBlend->getOperand(2);
NewBlend->setOperand(0, Inc1);
NewBlend->setOperand(1, Inc0);
NewBlend->setOperand(2, NewMask);
if (OldMask->getNumUsers() == 0)
cast<VPInstruction>(OldMask)->eraseFromParent();
Select =
Builder.createSelect(Blend->getMask(I), Blend->getIncomingValue(I),
Select, R.getDebugLoc(), "predphi");
Select->setUnderlyingValue(Blend->getUnderlyingValue());
}
Blend->replaceAllUsesWith(Select);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i64 0
; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; TFNONE-NEXT: [[TMP2:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
; TFNONE-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x double> [[TMP2]], zeroinitializer
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
; TFNONE-NEXT: [[TMP3:%.*]] = fcmp ule <2 x double> [[TMP2]], zeroinitializer
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
; TFNONE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1
; TFNONE-NEXT: store double [[TMP14]], ptr [[P:%.*]], align 8
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -935,8 +935,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[TMP7]], i64 0
; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFNONE-NEXT: [[TMP8:%.*]] = call <vscale x 2 x double> @exp_masked_scalable(<vscale x 2 x double> [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true))
; TFNONE-NEXT: [[TMP9:%.*]] = fcmp ogt <vscale x 2 x double> [[TMP8]], zeroinitializer
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> splat (double 1.000000e+00)
; TFNONE-NEXT: [[TMP9:%.*]] = fcmp ule <vscale x 2 x double> [[TMP8]], zeroinitializer
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x double> splat (double 1.000000e+00), <vscale x 2 x double> zeroinitializer
; TFNONE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; TFNONE-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2
; TFNONE-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,8 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 8 x i16> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP12]], <vscale x 8 x i16> splat (i16 99), <vscale x 8 x i16> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <vscale x 8 x i16> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP8]], <vscale x 8 x i16> [[WIDE_LOAD]], <vscale x 8 x i16> splat (i16 99)
; CHECK-NEXT: store <vscale x 8 x i16> [[PREDPHI]], ptr [[TMP11]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], [[TMP5]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -677,9 +677,9 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 42)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 42)
; CHECK-NEXT: [[TMP10:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 27)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i64> [[TMP10]]
; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -720,12 +720,12 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) {
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42)
; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42)
; FIXED-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 42)
; FIXED-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD1]], splat (i64 42)
; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], splat (i64 27)
; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27)
; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[WIDE_LOAD]], <4 x i64> [[TMP6]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[WIDE_LOAD1]], <4 x i64> [[TMP7]]
; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8
; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP3]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
Expand Down Expand Up @@ -797,9 +797,9 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 42)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 42)
; CHECK-NEXT: [[TMP10:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], splat (i64 27)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> [[TMP10]], <vscale x 2 x i64> [[WIDE_LOAD]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i64> [[TMP10]]
; CHECK-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -840,12 +840,12 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) {
; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42)
; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42)
; FIXED-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 42)
; FIXED-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD1]], splat (i64 42)
; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], splat (i64 27)
; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27)
; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]]
; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[WIDE_LOAD]], <4 x i64> [[TMP6]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[WIDE_LOAD1]], <4 x i64> [[TMP7]]
; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8
; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP3]], align 8
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; NO-VP-OUTLOOP-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP21]], align 4
; NO-VP-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3)
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp sle <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 3)
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x i32> [[TMP16]], <vscale x 4 x i32> [[VEC_PHI]]
; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP16]]
; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; NO-VP-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down Expand Up @@ -956,9 +956,9 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; NO-VP-OUTLOOP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
; NO-VP-OUTLOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP27]], align 4
; NO-VP-OUTLOOP-NEXT: [[TMP28:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]]
; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp sle <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_IND]]
; NO-VP-OUTLOOP-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP28]], <vscale x 4 x i32> [[TMP22]], <vscale x 4 x i32> [[VEC_PHI]]
; NO-VP-OUTLOOP-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP22]]
; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; NO-VP-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
; NO-VP-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ define i32 @test_explicit_pred(i64 %len) {
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], splat (i64 4)
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4
Expand All @@ -47,10 +47,10 @@ define i32 @test_explicit_pred(i64 %len) {
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP15]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI10:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI11:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD8]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[WIDE_LOAD9]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD]]
; CHECK-NEXT: [[PREDPHI10:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD7]]
; CHECK-NEXT: [[PREDPHI11:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD8]]
; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> zeroinitializer, <4 x i32> [[WIDE_LOAD9]]
; CHECK-NEXT: [[TMP16]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[VEC_PHI4]], [[PREDPHI10]]
; CHECK-NEXT: [[TMP18]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI11]]
Expand Down
Loading