Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7189,7 +7189,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
Comment on lines 7192 to 7193
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment still relevant?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, comment indicates that these transforms are still taking place during VPlan execution rather than planning. Perhaps worth rewording to clarify.

VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializePacksAndUnpacks,
BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
bool HasBranchWeights =
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,11 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ResumeForEpilogue,
/// Returns the value for vscale.
VScale,
/// Extracts all lanes from its (non-scalable) vector operand. This is an
/// abstract VPInstruction whose single defined VPValue represents VF
/// scalars extracted from a vector, to be replaced by VF ExtractElement
/// VPInstructions.
Unpack,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to place next to BuildStructVector and BuildVector "Pack" cases - which btw are concrete rather than abstract. (An analogous concrete Unpack which defines VF distinct VPValues could be introduced only after replicating by VF.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved, thanks

};

/// Returns true if this VPInstruction generates scalar values for all lanes.
Expand Down Expand Up @@ -2718,6 +2723,15 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
return R && classof(R);
}

static inline bool classof(const VPValue *VPV) {
const VPRecipeBase *R = VPV->getDefiningRecipe();
return R && classof(R);
}

static inline bool classof(const VPSingleDefRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

Comment on lines +2725 to +2733
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, related to the introduction of unpack?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's needed for this change, due to the new cast/dyn_cast/isa with VPSingleDefRecipe

/// Generate the reduction in the loop.
void execute(VPTransformState &State) override;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::AnyOf:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
case VPInstruction::Unpack:
return SetResultTyFromOp();
case VPInstruction::ExtractLane:
return inferScalarType(R->getOperand(1));
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}

template <typename Op0_t, typename Op1_t>
inline VPInstruction_match<Instruction::ExtractElement, Op0_t, Op1_t>
m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<Instruction::ExtractElement>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
case VPInstruction::Unpack:
return 1;
case Instruction::ICmp:
case Instruction::FCmp:
Expand Down Expand Up @@ -1240,6 +1241,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::StepVector:
case VPInstruction::ReductionStartVector:
case VPInstruction::VScale:
case VPInstruction::Unpack:
return false;
default:
return true;
Expand Down Expand Up @@ -1284,7 +1286,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:
return Op == getOperand(0);
// WidePtrAdd supports scalar and vector base addresses.
return false;
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
Expand Down Expand Up @@ -1408,6 +1411,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ResumeForEpilogue:
O << "resume-for-epilogue";
break;
case VPInstruction::Unpack:
O << "unpack";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down
51 changes: 50 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}

uint64_t Idx;
if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could m_BuildVector(Op0) provide the desired operand?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think it could, but would be inconsistent with other matchers, where the inner matcher match the operands.

Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
return;
}

if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
Expand Down Expand Up @@ -3740,7 +3747,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
BTC->replaceAllUsesWith(TCMO);
}

void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;

Expand Down Expand Up @@ -3789,6 +3796,48 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
});
}
}

// Create explicit VPInstructions to convert vectors to scalars.
for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe,
VPDerivedIVRecipe, VPCanonicalIVPHIRecipe>(&R))
continue;
for (VPValue *Def : R.definedValues()) {
// Skip recipes that are single-scalar or only have their first lane
// used.
// TODO: The Defs skipped here may or may not be vector values.
// Introduce Unpacks, and remove them later, if they are guaranteed to
// produce scalar values.
Comment on lines +3810 to +3812
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be placed above - next to "Create explicit...", to also capture skipping VPInstructions?

Suggested change
// TODO: The Defs skipped here may or may not be vector values.
// Introduce Unpacks, and remove them later, if they are guaranteed to
// produce scalar values.
// Current implementation is conservative - it may miss some cases that may
// or may not be vector values. TODO: introduce Unpacks speculatively - remove
// them later if they are known to operate on scalar values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved and adjusted, thanks

if (vputils::isSingleScalar(Def) || vputils::onlyFirstLaneUsed(Def))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The onlyFirstLaneUsed case may require a single extract rather than full unpack.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I added a comment that the recipes skipped here may or may not produce a vector eventually and a TODO to introduce unpacks for them as well, and remove them once we are guaranteed to produce scalars.

For that reason, I left the code as is here for now.

continue;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth explaining how scalar users inside replicate regions (continue to) extract their values w/o going through an Unpack VPInstruction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done and added TODO, thanks!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Better placed before the lambda?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be moved, thanks

// At the moment, we only create unpacks for scalar users outside
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// At the moment, we only create unpacks for scalar users outside
// At the moment, we create unpacks only for scalar users outside

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks

// replicate regions. Recipes inside replicate regions still manually
// extract the required lanes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// replicate regions. Recipes inside replicate regions still manually
// extract the required lanes.
// replicate regions. Recipes inside replicate regions still extract the
// required lanes implicitly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated thanks

// TODO: Remove once replicate regions are
// unrolled completely.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO: Remove once replicate regions are
// unrolled completely.
// TODO: Remove once replicate regions are unrolled completely.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

auto IsCandidateUnpackUser = [Def](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
return U->usesScalars(Def) &&
(!ParentRegion || !ParentRegion->isReplicator());
};
if (none_of(Def->users(), IsCandidateUnpackUser))
continue;

auto *Unpack = new VPInstruction(VPInstruction::Unpack, {Def});
if (R.isPhi())
Unpack->insertBefore(*VPBB, VPBB->getFirstNonPhi());
else
Unpack->insertAfter(&R);
Def->replaceUsesWithIf(Unpack,
[&IsCandidateUnpackUser](VPUser &U, unsigned) {
return IsCandidateUnpackUser(&U);
});
}
}
}
}

void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,9 @@ struct VPlanTransforms {
VPBasicBlock *VectorPH);

/// Add explicit Build[Struct]Vector recipes that combine multiple scalar
/// values into single vectors.
static void materializeBuildVectors(VPlan &Plan);
/// values into single vectors and Unpack recipes to extract scalars from a
/// vector as needed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
  /// into vectors and Unpack recipes to extract scalars from vectors as needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

static void materializePacksAndUnpacks(VPlan &Plan);

/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
Expand Down
20 changes: 18 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,10 +466,21 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
/// definitions for operands of \DefR.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: "\p DefR" rather than "\DefR"

static VPRecipeWithIRFlags *
static VPValue *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
VPValue *Op;
if (match(DefR, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)))) {
auto LaneDefs = Def2LaneDefs.find(Op);
if (LaneDefs != Def2LaneDefs.end())
return LaneDefs->second[Lane.getKnownLane()];

VPValue *Idx =
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
}

// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : DefR->operands()) {
Expand All @@ -481,6 +492,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
continue;
}
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
// Look through mandatory Unpack.
[[maybe_unused]] bool Matched =
match(Op, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)));
assert(Matched && "original op must have been Unpack");
NewOps.push_back(
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
Expand Down Expand Up @@ -548,7 +563,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should Unpack indicate that it doesGeneratePerAllLanes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may not generate for all lanes at the moment, if only its first lane is used and all cases need to be handled by unrolling at the moment.

continue;

auto *DefR = cast<VPRecipeWithIRFlags>(&R);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,12 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i64> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[INDEX]], [[MUL_2_I]]
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
; CHECK-NEXT: [[TMP27:%.*]] = udiv i64 [[TMP26]], [[X]]
; CHECK-NEXT: [[TMP28:%.*]] = urem i64 [[TMP26]], [[X]]
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <vscale x 2 x i64> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[X]], [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], [[TMP25]]
; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], [[X]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
Expand All @@ -61,8 +61,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP8]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
Expand Down Expand Up @@ -59,7 +60,6 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
; CHECK-NEXT: [[TMP24]] = phi <4 x i16> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP24]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP26:%.*]] = sext <4 x i16> [[TMP25]] to <4 x i32>
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[TMP27]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,17 +284,17 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
; CHECK-NEXT: store i64 0, ptr [[A]], align 8
; CHECK-NEXT: store i64 0, ptr [[B]], align 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,25 +205,25 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[STEP_ADD]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[STEP_ADD_2]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[STEP_ADD_3]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
; CHECK-NEXT: store ptr null, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 1
; CHECK-NEXT: store ptr null, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
; CHECK-NEXT: store ptr null, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
; CHECK-NEXT: store ptr null, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
; CHECK-NEXT: store ptr null, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
; CHECK-NEXT: store ptr null, ptr [[TMP9]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
; CHECK-NEXT: store ptr null, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 1
; CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
; CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This store to TMP17 replaces the one removed above to TMP4?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok: this store to TMP17 replaces the one to TMP11, which is retained but TMP11 is redefined.

Wonder if some simple "sink each ExtractElement to before its single/first user" pass could help confirm all test differences?

; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 2
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 4
Expand Down
Loading
Loading