Skip to content
26 changes: 5 additions & 21 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
LastLane = 0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: above comment deserves updating, as this method no longer has a Part parameter - "// If Values have been set for this Def return the one relevant for \p Part."

}

assert(IsSingleScalar && "must be a single-scalar at this point");
// We need to construct the vector value for a single-scalar value by
// broadcasting the scalar to all lanes.
Comment on lines +346 to +347
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// We need to construct the vector value for a single-scalar value by
// broadcasting the scalar to all lanes.
// We need to construct the vector value for a single-scalar value by
// broadcasting the scalar to all lanes.
// TODO: Replace by introducing Broadcast VPInstructions.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

auto *LastInst = cast<Instruction>(get(Def, LastLane));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: better sink the definition of LastInst so that all comments appear together.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
Expand All @@ -343,27 +346,8 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
: std::next(BasicBlock::iterator(LastInst));
Builder.SetInsertPoint(&*NewIP);

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed thanks

// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
// broadcast the scalar value corresponding to lane zero. Otherwise, we
// construct the vector values using insertelement instructions. Since the
// resulting vectors are stored in State, we will only generate the
// insertelements once.
Value *VectorValue = nullptr;
if (IsSingleScalar) {
VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
assert(isa<VPInstruction>(Def) &&
"Explicit BuildVector recipes must have"
"handled packing for non-VPInstructions.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
set(Def, VectorValue);
}
Value *VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue);
Builder.restoreIP(OldIP);
return VectorValue;
}
Expand Down
21 changes: 9 additions & 12 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
return R && classof(R);
}

virtual VPRecipeWithIRFlags *clone() override = 0;

void execute(VPTransformState &State) override = 0;

/// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
Expand Down Expand Up @@ -1045,13 +1047,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
VScale,
};

private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;

/// An optional name that can be used for the generated IR instruction.
const std::string Name;

/// Returns true if this VPInstruction generates scalar values for all lanes.
/// Most VPInstructions generate a single value per part, either vector or
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
Expand All @@ -1060,6 +1055,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesGeneratePerAllLanes() moved from private to public.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep


private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;

/// An optional name that can be used for the generated IR instruction.
const std::string Name;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
bool canGenerateScalarForFirstLane() const;
Expand All @@ -1069,11 +1071,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// existing value is returned rather than a generated one.
Value *generate(VPTransformState &State);

/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
Value *generatePerLane(VPTransformState &State, const VPLane &Lane);

#if !defined(NDEBUG)
/// Return the number of operands determined by the opcode of the
/// VPInstruction. Returns -1u if the number of operands cannot be determined
Expand Down
30 changes: 6 additions & 24 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,16 +525,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
}
}

Value *VPInstruction::generatePerLane(VPTransformState &State,
const VPLane &Lane) {
IRBuilderBase &Builder = State.Builder;

assert(getOpcode() == VPInstruction::PtrAdd &&
"only PtrAdd opcodes are supported for now");
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
State.get(getOperand(1), Lane), Name);
}

/// Create a conditional branch using \p Cond branching to the successors of \p
/// VPBB. Note that the first successor is always forward (i.e. not created yet)
/// while the second successor may already have been created (if it is a header
Expand Down Expand Up @@ -1154,24 +1144,13 @@ void VPInstruction::execute(VPTransformState &State) {
"Set flags not supported for the provided opcode");
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
isVectorToScalar() || isSingleScalar());
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
Lane != NumLanes; ++Lane) {
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
assert(GeneratedValue && "generatePerLane must produce a value");
State.set(this, GeneratedValue, VPLane(Lane));
}
return;
}
Comment on lines -1203 to -1212
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is dropped due to earlier replication of VPInstructions by VF.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep


Value *GeneratedValue = generate(State);
if (!hasResult())
return;
assert(GeneratedValue && "generate must produce a value");
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
isVectorToScalar() || isSingleScalar());
assert((((GeneratedValue->getType()->isVectorTy() ||
GeneratedValue->getType()->isStructTy()) ==
!GeneratesPerFirstLaneOnly) ||
Expand Down Expand Up @@ -1244,6 +1223,9 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::Broadcast:
case VPInstruction::ReductionStartVector:
return true;
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
return getNumOperands() > 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are Build*Vector with a single operand useful, do they construct single element vectors?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All build-vectors start out as single operand, which will get expanded once we replicate by VF

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and this determines if onlyFirstLaneUsed or not? Worth a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added thanks

case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:
Expand Down
32 changes: 19 additions & 13 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3377,34 +3377,40 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
vp_depth_first_shallow(Plan.getEntry()));
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()));
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
// excluding ones in replicate regions. Those are not materialized explicitly
// yet. Those vector users are still handled in VPReplicateRegion::execute(),
// via shouldPack().
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and
// VPInstructions, excluding ones in replicate regions. Those are not
// materialized explicitly yet. Those vector users are still handled in
// VPReplicateRegion::execute(), via shouldPack().
// TODO: materialize build vectors for replicating recipes in replicating
// regions.
// TODO: materialize build vectors for VPInstructions.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO: materialize build vectors for VPInstructions.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed thanks

for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
auto *DefR = cast<VPRecipeWithIRFlags>(&R);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doen thanks

if (!DefR || !isa<VPReplicateRecipe, VPInstruction>(DefR))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!DefR || !isa<VPReplicateRecipe, VPInstruction>(DefR))
if (!isa_and_present<VPReplicateRecipe, VPInstruction>(DefR))

although slightly longer...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated as suggested just below

continue;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
if (!DefR || !isa<VPReplicateRecipe, VPInstruction>(DefR))
continue;
if (!isa<VPReplicateRecipe, VPInstruction>(&R))
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
};
if (!RepR || RepR->isSingleScalar() ||
none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
if ((isa<VPReplicateRecipe>(DefR) &&
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
(isa<VPInstruction>(DefR) &&
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes()) ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should isSingleScalar() and doesGeneratePerAllLanes be combined into a common method of VPRecipeWithIRFlags? (if not VPSingleDefRecipe)

Should the two merge, i.e., by converting replicated recipes into VPInstructions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After replicating by VF, we can replace single-scalar VPReplicateRecipes by VPInstructions (or even create them directly instead of single-scalar VPReplicateRecipes). This will probably also require explicit unpacking first (#155670).

To do so before replicating by VF, we would probably need to encode single-scalar in VPInstruction (or VPInstructionWithType) #140623.

Some VPReplicateRecipes cannnot be expressed easily by VPInstructions at the moment, including load/stores, and calls.

vputils::onlyFirstLaneUsed(DefR) ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How/Is the additional check of onlyFirstLaneUsed() related to DefR being a VPInstruction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VPReplicateRecipes encode single-scalarness in the recipe, whereas for VPInstructions we currently have to infer it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so should the onlyFirstLaneUsed(DefR) be placed under the isa<VPInstruction>(DefR) &&?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, done thanks

none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
continue;

Type *ScalarTy = TypeInfo.inferScalarType(RepR);
Type *ScalarTy = TypeInfo.inferScalarType(DefR);
unsigned Opcode = ScalarTy->isStructTy()
? VPInstruction::BuildStructVector
: VPInstruction::BuildVector;
auto *BuildVector = new VPInstruction(Opcode, {RepR});
BuildVector->insertAfter(RepR);
auto *BuildVector = new VPInstruction(Opcode, {DefR});
BuildVector->insertAfter(DefR);

RepR->replaceUsesWithIf(
DefR->replaceUsesWithIf(
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
VPUser &U, unsigned) {
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ struct VPlanTransforms {
/// Explicitly unroll \p Plan by \p UF.
static void unrollByUF(VPlan &Plan, unsigned UF);

/// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
/// with \p VF single-scalar recipes.
/// Replace each VPReplicateRecipe and replicating VPInstruction outside on
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Replace each VPReplicateRecipe and replicating VPInstruction outside on
/// Replace each replicating VPReplicateRecipe and VPInstruction outside of

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

/// any replicate region in \p Plan with \p VF single-scalar recipes.
/// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
/// dissolving the latter.
static void replicateByVF(VPlan &Plan, ElementCount VF);
Expand Down
69 changes: 41 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,15 +463,15 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
VPlanTransforms::removeDeadRecipes(Plan);
}

/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
static VPReplicateRecipe *
/// Create a single-scalar clone of \p DefR for lane \p Lane. Use \p
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting and/or asserting that DefR is expected to be either a replicating recipe or a VPInstruction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added comment and assert in the else below

/// Def2LaneDefs to look up scalar definitions for operands of \DefR.
static VPRecipeWithIRFlags *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPReplicateRecipe *RepR, VPLane Lane,
VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : RepR->operands()) {
for (VPValue *Op : DefR->operands()) {
// If Op is a definition that has been unrolled, directly use the clone for
// the corresponding lane.
auto LaneDefs = Def2LaneDefs.find(Op);
Expand Down Expand Up @@ -501,11 +501,19 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
NewOps.push_back(Ext);
}

auto *New =
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
New->transferFlags(*RepR);
New->insertBefore(RepR);
VPRecipeWithIRFlags *New;
if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
New =
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
Comment on lines +511 to +512
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: have cloning of replicate recipes also provide the desired result coupled with setting its operands to NewOps (deriving IsSingleScalar and Mask from the operands?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added thanks

} else {
New = DefR->clone();
for (const auto &[Idx, Op] : enumerate(NewOps)) {
New->setOperand(Idx, Op);
}
}
New->transferFlags(*DefR);
New->insertBefore(DefR);
return New;
}

Expand All @@ -530,41 +538,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR || RepR->isSingleScalar())
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
if (!DefR || !isa<VPInstruction, VPReplicateRecipe>(DefR))
continue;
if ((isa<VPReplicateRecipe>(DefR) &&
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
(isa<VPInstruction>(DefR) &&
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes()))
continue;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
if (!DefR || !isa<VPInstruction, VPReplicateRecipe>(DefR))
continue;
if ((isa<VPReplicateRecipe>(DefR) &&
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
(isa<VPInstruction>(DefR) &&
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes()))
continue;
if (!isa<VPInstruction, VPReplicateRecipe>(&R)) ||
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);

VPBuilder Builder(RepR);
if (RepR->getNumUsers() == 0) {
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
vputils::isSingleScalar(RepR->getOperand(1))) {
VPBuilder Builder(DefR);
if (DefR->getNumUsers() == 0) {
if (isa<StoreInst>(DefR->getUnderlyingInstr()) &&
vputils::isSingleScalar(DefR->getOperand(1))) {
// Stores to invariant addresses need to store the last lane only.
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane::getLastLaneForVF(VF),
Def2LaneDefs);
} else {
// Create single-scalar version of RepR for all lanes.
// Create single-scalar version of DefR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
}
RepR->eraseFromParent();
DefR->eraseFromParent();
continue;
}
/// Create single-scalar version of RepR for all lanes.
/// Create single-scalar version of DefR for all lanes.
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
LaneDefs.push_back(
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));

Def2LaneDefs[RepR] = LaneDefs;
Def2LaneDefs[DefR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(RepR);
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(DefR);
});

// Update each build vector user that currently has RepR as its only
// Update each build vector user that currently has DefR as its only
// operand, to have all LaneDefs as its operands.
for (VPUser *U : to_vector(RepR->users())) {
for (VPUser *U : to_vector(DefR->users())) {
auto *VPI = dyn_cast<VPInstruction>(U);
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
VPI->getOpcode() != VPInstruction::BuildStructVector))
Expand All @@ -576,7 +589,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
for (VPValue *LaneDef : drop_begin(LaneDefs))
VPI->addOperand(LaneDef);
}
ToRemove.push_back(RepR);
ToRemove.push_back(DefR);
}
}
for (auto *R : reverse(ToRemove))
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/Transforms/LoopVectorize/pointer-induction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How/are these inserts (and those below) related to the patch, perhaps to dropping the packing from VPTransformState::get()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If ther eare users in replicate regions, we will materialize explicit build-vectors for those users, but we will look thourgh them during codegen

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential for being cleaned up when replicate regions will be replicated by VF?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes exactly

; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
Expand Down Expand Up @@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) {
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]]
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
Comment on lines -652 to -654
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DCE or generation avoided now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will be DCE'd afte unrolling and only the last lane being used

; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]]
; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
Expand Down
Loading