Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7192,7 +7192,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
Comment on lines 7192 to 7193
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment still relevant?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, comment indicates that these transforms are still taking place during VPlan execution rather than planning. Perhaps worth rewording to clarify.

VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializePacksAndUnpacks,
BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
bool HasBranchWeights =
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,11 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// Creates a fixed-width vector containing all operands. The number of
/// operands matches the vector element count.
BuildVector,
/// Extracts all lanes from its (non-scalable) vector operand. This is an
/// abstract VPInstruction whose single defined VPValue represents VF
/// scalars extracted from a vector, to be replaced by VF ExtractElement
/// VPInstructions.
Unpack,
/// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
/// where one of (x,y) is loop invariant, and both x and y are integer type.
ComputeAnyOfResult,
Expand Down Expand Up @@ -2717,6 +2722,15 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
return R && classof(R);
}

static inline bool classof(const VPValue *VPV) {
const VPRecipeBase *R = VPV->getDefiningRecipe();
return R && classof(R);
}

static inline bool classof(const VPSingleDefRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

Comment on lines +2725 to +2733
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, related to the introduction of unpack?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's needed for this change, due to the new cast/dyn_cast/isa with VPSingleDefRecipe

/// Generate the reduction in the loop.
void execute(VPTransformState &State) override;

Expand Down Expand Up @@ -3102,6 +3116,9 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
/// Returns true if this expression contains recipes that may have side
/// effects.
bool mayHaveSideEffects() const;

/// Returns true if the result of this VPExpressionRecipe is a single-scalar.
bool isSingleScalar() const;
};

/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::AnyOf:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
case VPInstruction::Unpack:
return SetResultTyFromOp();
case VPInstruction::ExtractLane:
return inferScalarType(R->getOperand(1));
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}

template <typename Op0_t, typename Op1_t>
inline VPInstruction_match<Instruction::ExtractElement, Op0_t, Op1_t>
m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<Instruction::ExtractElement>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
case VPInstruction::Unpack:
return 1;
case Instruction::ICmp:
case Instruction::FCmp:
Expand Down Expand Up @@ -1241,6 +1242,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::StepVector:
case VPInstruction::ReductionStartVector:
case VPInstruction::VScale:
case VPInstruction::Unpack:
return false;
default:
return true;
Expand Down Expand Up @@ -1285,7 +1287,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:
return Op == getOperand(0);
// WidePtrAdd supports scalar and vector base addresses.
return false;
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
Expand Down Expand Up @@ -1409,6 +1412,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ResumeForEpilogue:
O << "resume-for-epilogue";
break;
case VPInstruction::Unpack:
O << "unpack";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down Expand Up @@ -2880,6 +2886,13 @@ bool VPExpressionRecipe::mayHaveSideEffects() const {
return false;
}

bool VPExpressionRecipe::isSingleScalar() const {
// Cannot use vputils::isSingleScalar(), because all external operands
// of the expression will be live-ins while bundled.
return isa<VPReductionRecipe>(ExpressionRecipes.back()) &&
!isa<VPPartialReductionRecipe>(ExpressionRecipes.back());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
Expand Down
53 changes: 52 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}

uint64_t Idx;
if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could m_BuildVector(Op0) provide the desired operand?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think it could, but would be inconsistent with other matchers, where the inner matcher match the operands.

Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
return;
}

if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
Expand Down Expand Up @@ -3738,7 +3745,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
BTC->replaceAllUsesWith(TCMO);
}

void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;

Expand Down Expand Up @@ -3787,6 +3794,50 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
});
}
}

// Create explicit VPInstructions to convert vectors to scalars. The current
// implementation is conservative - it may miss some cases that may or may not
// be vector values. TODO: introduce Unpacks speculatively - remove them later
// if they are known to operate on scalar values.
for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe,
VPDerivedIVRecipe, VPCanonicalIVPHIRecipe>(&R))
continue;
for (VPValue *Def : R.definedValues()) {
// Skip recipes that are single-scalar or only have their first lane
// used.
// TODO: The Defs skipped here may or may not be vector values.
// Introduce Unpacks, and remove them later, if they are guaranteed to
// produce scalar values.
Comment on lines +3810 to +3812
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be placed above - next to "Create explicit...", to also capture skipping VPInstructions?

Suggested change
// TODO: The Defs skipped here may or may not be vector values.
// Introduce Unpacks, and remove them later, if they are guaranteed to
// produce scalar values.
// Current implementation is conservative - it may miss some cases that may
// or may not be vector values. TODO: introduce Unpacks speculatively - remove
// them later if they are known to operate on scalar values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved and adjusted, thanks

if (vputils::isSingleScalar(Def) || vputils::onlyFirstLaneUsed(Def))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The onlyFirstLaneUsed case may require a single extract rather than full unpack.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I added a comment that the recipes skipped here may or may not produce a vector eventually and a TODO to introduce unpacks for them as well, and remove them once we are guaranteed to produce scalars.

For that reason, I left the code as is here for now.

continue;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth explaining how scalar users inside replicate regions (continue to) extract their values w/o going through an Unpack VPInstruction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done and added TODO, thanks!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Better placed before the lambda?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be moved, thanks

// At the moment, we create unpacks only for scalar users outside
// replicate regions. Recipes inside replicate regions still extract the
// required lanes implicitly.
// TODO: Remove once replicate regions are unrolled completely.
auto IsCandidateUnpackUser = [Def](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
return U->usesScalars(Def) &&
(!ParentRegion || !ParentRegion->isReplicator());
};
if (none_of(Def->users(), IsCandidateUnpackUser))
continue;

auto *Unpack = new VPInstruction(VPInstruction::Unpack, {Def});
if (R.isPhi())
Unpack->insertBefore(*VPBB, VPBB->getFirstNonPhi());
else
Unpack->insertAfter(&R);
Def->replaceUsesWithIf(Unpack,
[&IsCandidateUnpackUser](VPUser &U, unsigned) {
return IsCandidateUnpackUser(&U);
});
}
}
}
}

void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,10 @@ struct VPlanTransforms {
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);

/// Add explicit Build[Struct]Vector recipes that combine multiple scalar
/// values into single vectors.
static void materializeBuildVectors(VPlan &Plan);
/// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
/// into vectors and Unpack recipes to extract scalars from vectors as
/// needed.
static void materializePacksAndUnpacks(VPlan &Plan);

/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
Expand Down
20 changes: 18 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,10 +466,21 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
/// definitions for operands of \DefR.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: "\p DefR" rather than "\DefR"

static VPRecipeWithIRFlags *
static VPValue *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
VPValue *Op;
if (match(DefR, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)))) {
auto LaneDefs = Def2LaneDefs.find(Op);
if (LaneDefs != Def2LaneDefs.end())
return LaneDefs->second[Lane.getKnownLane()];

VPValue *Idx =
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
}

// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : DefR->operands()) {
Expand All @@ -481,6 +492,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
continue;
}
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
// Look through mandatory Unpack.
[[maybe_unused]] bool Matched =
match(Op, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)));
assert(Matched && "original op must have been Unpack");
NewOps.push_back(
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
Expand Down Expand Up @@ -548,7 +563,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should Unpack indicate that it doesGeneratePerAllLanes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may not generate for all lanes at the moment, if only its first lane is used and all cases need to be handled by unrolling at the moment.

continue;

auto *DefR = cast<VPRecipeWithIRFlags>(&R);
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ inline bool isSingleScalar(const VPValue *VPV) {
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
(PreservesUniformity(VPI->getOpcode()) &&
all_of(VPI->operands(), isSingleScalar));
if (isa<VPPartialReductionRecipe>(VPV))
return false;
if (isa<VPReductionRecipe>(VPV))
return true;
if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
return Expr->isSingleScalar();

// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
return isa<VPExpandSCEVRecipe>(VPV);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,12 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i64> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[INDEX]], [[MUL_2_I]]
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP27:%.*]] = udiv i64 [[TMP26]], [[X]]
; CHECK-NEXT: [[TMP28:%.*]] = urem i64 [[TMP26]], [[X]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i64> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[X]], [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], [[TMP25]]
; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], [[X]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
Expand All @@ -61,8 +61,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP8]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
Expand Down Expand Up @@ -59,7 +60,6 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP24]] = phi <4 x i16> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP24]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP26:%.*]] = sext <4 x i16> [[TMP25]] to <4 x i32>
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[TMP27]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,17 +284,17 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
; CHECK-NEXT: store i64 0, ptr [[A]], align 8
; CHECK-NEXT: store i64 0, ptr [[B]], align 8
Expand Down
Loading