Skip to content

Commit a5c7d7d

Browse files
committed
Make alias lane mask a recipe
1 parent 4c3da47 commit a5c7d7d

File tree

4 files changed

+96
-61
lines changed

4 files changed

+96
-61
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8909,9 +8909,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
89098909
*PSE.getSE());
89108910
VPValue *Src = vputils::getOrCreateVPValueForSCEVExpr(*Plan, C.SrcStart,
89118911
*PSE.getSE());
8912-
VPValue *M =
8913-
Builder.createNaryOp(VPInstruction::AliasLaneMask, {Sink, Src}, DL,
8914-
"active.lane.mask.alias");
8912+
VPAliasLaneMaskRecipe *M = new VPAliasLaneMaskRecipe(Src, Sink);
8913+
VecPreheader->appendRecipe(M);
89158914
if (AliasMask)
89168915
AliasMask = Builder.createAnd(AliasMask, M);
89178916
else

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,6 +912,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
912912
switch (R->getVPDefID()) {
913913
case VPRecipeBase::VPDerivedIVSC:
914914
case VPRecipeBase::VPEVLBasedIVPHISC:
915+
case VPRecipeBase::VPAliasLaneMaskSC:
915916
case VPRecipeBase::VPExpandSCEVSC:
916917
case VPRecipeBase::VPInstructionSC:
917918
case VPRecipeBase::VPReductionEVLSC:
@@ -1244,7 +1245,6 @@ class VPInstruction : public VPRecipeWithIRFlags {
12441245
SLPLoad,
12451246
SLPStore,
12461247
ActiveLaneMask,
1247-
AliasLaneMask,
12481248
ExplicitVectorLength,
12491249
/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
12501250
/// The first operand is the incoming value from the predecessor in VPlan,
@@ -2698,6 +2698,44 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
26982698
}
26992699
};
27002700

2701+
// Given a pointer A that is being stored to, and pointer B that is being
2702+
// read from, both with unknown lengths, create a mask that disables
2703+
// elements which could overlap across a loop iteration. For example, if A
2704+
// is X and B is X + 2 with VF being 4, only the final two elements of the
2705+
// loaded vector can be stored since they don't overlap with the stored
2706+
// vector. %b.vec = load %b ; = [s, t, u, v]
2707+
// [...]
2708+
// store %a, %b.vec ; only u and v can be stored as their addresses don't
2709+
// overlap with %a + (VF - 1)
2710+
class VPAliasLaneMaskRecipe : public VPSingleDefRecipe {
2711+
2712+
public:
2713+
VPAliasLaneMaskRecipe(VPValue *Src, VPValue *Sink)
2714+
: VPSingleDefRecipe(VPDef::VPAliasLaneMaskSC, {Src, Sink}) {}
2715+
2716+
~VPAliasLaneMaskRecipe() override = default;
2717+
2718+
VPAliasLaneMaskRecipe *clone() override {
2719+
return new VPAliasLaneMaskRecipe(getSourceValue(), getSinkValue());
2720+
}
2721+
2722+
VP_CLASSOF_IMPL(VPDef::VPAliasLaneMaskSC);
2723+
2724+
void execute(VPTransformState &State) override;
2725+
2726+
/// Get the VPValue* for the pointer being read from
2727+
VPValue *getSourceValue() const { return getOperand(0); }
2728+
2729+
/// Get the VPValue* for the pointer being stored to
2730+
VPValue *getSinkValue() const { return getOperand(1); }
2731+
2732+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2733+
/// Print the recipe.
2734+
void print(raw_ostream &O, const Twine &Indent,
2735+
VPSlotTracker &SlotTracker) const override;
2736+
#endif
2737+
};
2738+
27012739
/// Recipe to expand a SCEV expression.
27022740
class VPExpandSCEVRecipe : public VPSingleDefRecipe {
27032741
const SCEV *Expr;

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 54 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -425,60 +425,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
425425
{PredTy, ScalarTC->getType()},
426426
{VIVElem0, ScalarTC}, nullptr, Name);
427427
}
428-
case VPInstruction::AliasLaneMask: {
429-
// Given a pointer A that is being stored to, and pointer B that is being
430-
// read from, both with unknown lengths, create a mask that disables
431-
// elements which could overlap across a loop iteration. For example, if A
432-
// is X and B is X + 2 with VF being 4, only the final two elements of the
433-
// loaded vector can be stored since they don't overlap with the stored
434-
// vector. %b.vec = load %b ; = [s, t, u, v]
435-
// [...]
436-
// store %a, %b.vec ; only u and v can be stored as their addresses don't
437-
// overlap with %a + (VF - 1)
438-
Value *ReadPtr = State.get(getOperand(0), VPIteration(Part, 0));
439-
Value *StorePtr = State.get(getOperand(1), VPIteration(Part, 0));
440-
unsigned ElementSize = 0;
441-
442-
// We expect the operands to the alias mask to be ptrtoint. Sometimes it's
443-
// an add of a ptrtoint.
444-
auto *ReadInsn = cast<Instruction>(ReadPtr);
445-
auto *ReadCast = dyn_cast<CastInst>(ReadPtr);
446-
if (ReadInsn->getOpcode() == Instruction::Add)
447-
ReadCast = dyn_cast<CastInst>(ReadInsn->getOperand(0));
448-
449-
if (ReadCast && ReadCast->getOpcode() == Instruction::PtrToInt) {
450-
Value *Ptr = ReadCast->getOperand(0);
451-
for (auto *Use : Ptr->users()) {
452-
if (auto *GEP = dyn_cast<GetElementPtrInst>(Use)) {
453-
auto *EltVT = GEP->getSourceElementType();
454-
if (EltVT->isArrayTy())
455-
ElementSize = EltVT->getArrayElementType()->getScalarSizeInBits() *
456-
EltVT->getArrayNumElements();
457-
else
458-
ElementSize =
459-
GEP->getSourceElementType()->getScalarSizeInBits() / 8;
460-
break;
461-
}
462-
}
463-
}
464-
assert(ElementSize > 0 && "Couldn't get element size from pointer");
465-
// Calculate how many elements the pointers differ by
466-
Value *Diff = Builder.CreateSub(StorePtr, ReadPtr, "sub.diff");
467-
auto *Type = Diff->getType();
468-
Value *MemEltSize = ConstantInt::get(Type, ElementSize);
469-
Value *DiffDiv = Builder.CreateSDiv(Diff, MemEltSize, "diff");
470-
// If the difference is negative then some elements may alias
471-
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_SLT, DiffDiv,
472-
ConstantInt::get(Type, 0), "neg.compare");
473-
// Splat the compare result then OR it with a lane mask
474-
Value *Splat = Builder.CreateVectorSplat(State.VF, Cmp);
475-
Value *DiffMask = Builder.CreateIntrinsic(
476-
Intrinsic::get_active_lane_mask,
477-
{VectorType::get(Builder.getInt1Ty(), State.VF), Type},
478-
{ConstantInt::get(Type, 0), DiffDiv}, nullptr, "ptr.diff.lane.mask");
479-
return Builder.CreateBinOp(Instruction::BinaryOps::Or, DiffMask, Splat,
480-
Name);
481-
}
482428
// Count the number of bits set in each lane and reduce the result to a scalar
483429
case VPInstruction::PopCount: {
484430
if (Part != 0)
@@ -895,9 +841,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
895841
case VPInstruction::ResumePhi:
896842
O << "resume-phi";
897843
break;
898-
case VPInstruction::AliasLaneMask:
899-
O << "alias lane mask";
900-
break;
901844
case VPInstruction::PopCount:
902845
O << "popcount";
903846
break;
@@ -2867,6 +2810,60 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
28672810
}
28682811
#endif
28692812

2813+
void VPAliasLaneMaskRecipe::execute(VPTransformState &State) {
2814+
IRBuilderBase Builder = State.Builder;
2815+
Value *SinkValue = State.get(getSinkValue(), 0, true);
2816+
Value *SourceValue = State.get(getSourceValue(), 0, true);
2817+
2818+
unsigned ElementSize = 0;
2819+
auto *ReadInsn = cast<Instruction>(SourceValue);
2820+
auto *ReadCast = dyn_cast<CastInst>(SourceValue);
2821+
if (ReadInsn->getOpcode() == Instruction::Add)
2822+
ReadCast = dyn_cast<CastInst>(ReadInsn->getOperand(0));
2823+
2824+
if (ReadCast && ReadCast->getOpcode() == Instruction::PtrToInt) {
2825+
Value *Ptr = ReadCast->getOperand(0);
2826+
for (auto *Use : Ptr->users()) {
2827+
if (auto *GEP = dyn_cast<GetElementPtrInst>(Use)) {
2828+
auto *EltVT = GEP->getSourceElementType();
2829+
if (EltVT->isArrayTy())
2830+
ElementSize = EltVT->getArrayElementType()->getScalarSizeInBits() *
2831+
EltVT->getArrayNumElements();
2832+
else
2833+
ElementSize = GEP->getSourceElementType()->getScalarSizeInBits() / 8;
2834+
break;
2835+
}
2836+
}
2837+
}
2838+
assert(ElementSize > 0 && "Couldn't get element size from pointer");
2839+
2840+
Value *Diff = Builder.CreateSub(SourceValue, SinkValue, "sub.diff");
2841+
auto *Type = Diff->getType();
2842+
Value *MemEltSize = ConstantInt::get(Type, ElementSize);
2843+
Value *DiffDiv = Builder.CreateSDiv(Diff, MemEltSize, "diff");
2844+
// If the difference is negative then some elements may alias
2845+
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_SLT, DiffDiv,
2846+
ConstantInt::get(Type, 0), "neg.compare");
2847+
// Splat the compare result then OR it with a lane mask
2848+
Value *Splat = Builder.CreateVectorSplat(State.VF, Cmp);
2849+
Value *DiffMask = Builder.CreateIntrinsic(
2850+
Intrinsic::get_active_lane_mask,
2851+
{VectorType::get(Builder.getInt1Ty(), State.VF), Type},
2852+
{ConstantInt::get(Type, 0), DiffDiv}, nullptr, "ptr.diff.lane.mask");
2853+
Value *Or = Builder.CreateBinOp(Instruction::BinaryOps::Or, DiffMask, Splat);
2854+
State.set(this, Or, 0, /*IsScalar=*/false);
2855+
}
2856+
2857+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2858+
void VPAliasLaneMaskRecipe::print(raw_ostream &O, const Twine &Indent,
2859+
VPSlotTracker &SlotTracker) const {
2860+
O << Indent << "ALIAS-LANE-MASK ";
2861+
getSourceValue()->printAsOperand(O, SlotTracker);
2862+
O << ", ";
2863+
getSinkValue()->printAsOperand(O, SlotTracker);
2864+
}
2865+
#endif
2866+
28702867
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
28712868
assert(!State.Instance && "cannot be used in per-lane");
28722869
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ class VPDef {
338338
using VPRecipeTy = enum {
339339
VPBranchOnMaskSC,
340340
VPDerivedIVSC,
341+
VPAliasLaneMaskSC,
341342
VPExpandSCEVSC,
342343
VPInstructionSC,
343344
VPInterleaveSC,

0 commit comments

Comments
 (0)