Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,11 @@ class TargetTransformInfo {
/// Return true if the target supports strided load.
LLVM_ABI bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;

/// Return true if the target benefits from the generation of a more
/// efficient instruction sequence for strided accesses.
LLVM_ABI bool preferToUseStrideRecipesForVectorization(Type *DataType,
Align Alignment) const;

/// Return true is the target supports interleaved access for the given vector
/// type \p VTy, interleave factor \p Factor, alignment \p Alignment and
/// address space \p AddrSpace.
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,11 @@ class TargetTransformInfoImplBase {
return false;
}

virtual bool preferToUseStrideRecipesForVectorization(Type *DataType,
Align Alignment) const {
return false;
}

virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
Align Alignment,
unsigned AddrSpace) const {
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,11 @@ bool TargetTransformInfo::isLegalStridedLoadStore(Type *DataType,
return TTIImpl->isLegalStridedLoadStore(DataType, Alignment);
}

bool TargetTransformInfo::preferToUseStrideRecipesForVectorization(
Type *DataType, Align Alignment) const {
return TTIImpl->preferToUseStrideRecipesForVectorization(DataType, Alignment);
}

bool TargetTransformInfo::isLegalInterleavedAccessType(
VectorType *VTy, unsigned Factor, Align Alignment,
unsigned AddrSpace) const {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,12 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
return isLegalMaskedGatherScatter(DataType);
}

bool
preferToUseStrideRecipesForVectorization(Type *DataType,
Align Alignment) const override {
return isLegalMaskedGatherScatter(DataType);
}

bool isLegalBroadcastLoad(Type *ElementTy,
ElementCount NumElements) const override {
// Return true if we can generate a `ld1r` splat load instruction.
Expand Down
35 changes: 26 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3956,7 +3956,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
[](const auto *R) { return Instruction::Select; })
.Case<VPWidenStoreRecipe>(
[](const auto *R) { return Instruction::Store; })
.Case<VPWidenLoadRecipe>(
.Case<VPWidenLoadRecipe, VPWidenStridedLoadRecipe>(
[](const auto *R) { return Instruction::Load; })
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
[](const auto *R) { return Instruction::Call; })
Expand Down Expand Up @@ -4056,6 +4056,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPReductionPHISC:
case VPDef::VPInterleaveEVLSC:
case VPDef::VPInterleaveSC:
case VPDef::VPWidenStridedLoadSC:
case VPDef::VPWidenLoadEVLSC:
case VPDef::VPWidenLoadSC:
case VPDef::VPWidenStoreEVLSC:
Expand Down Expand Up @@ -6940,6 +6941,12 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
RepR->getUnderlyingInstr(), VF))
return true;
}

// The strided load is transformed from a gather through VPlanTransform,
// and its cost will be lower than the original gather.
if (isa<VPWidenStridedLoadRecipe>(&R))
return true;

if (Instruction *UI = GetInstructionForCost(&R)) {
// If we adjusted the predicate of the recipe, the cost in the legacy
// cost model may be different.
Expand Down Expand Up @@ -7495,7 +7502,10 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
new VPVectorEndPointerRecipe(Ptr, &Plan.getVF(), getLoadStoreType(I),
/*Stride*/ -1, Flags, I->getDebugLoc());
} else {
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
const DataLayout &DL = I->getDataLayout();
auto *StrideTy = DL.getIndexType(Ptr->getUnderlyingValue()->getType());
VPValue *StrideOne = Plan.getOrAddLiveIn(ConstantInt::get(StrideTy, 1));
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), StrideOne,
GEP ? GEP->getNoWrapFlags()
: GEPNoWrapFlags::none(),
I->getDebugLoc());
Expand Down Expand Up @@ -8592,19 +8602,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
*Plan))
return nullptr;

VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
// TODO: Enable following transform when the EVL-version of extended-reduction
// and mulacc-reduction are implemented.
if (!CM.foldTailWithEVL()) {
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
if (!CM.foldTailWithEVL())
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
CostCtx, Range);
}

for (ElementCount VF : Range)
Plan->addVF(VF);
Plan->setName("Initial VPlan");

// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
Expand All @@ -8617,6 +8622,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPlanTransforms::runPass(VPlanTransforms::replaceSymbolicStrides, *Plan, PSE,
Legal->getLAI()->getSymbolicStrides());

// Convert memory recipes to strided access recipes if the strided access is
// legal and profitable.
VPlanTransforms::runPass(VPlanTransforms::convertToStridedAccesses, *Plan,
CostCtx, Range);

VPlanTransforms::runPass(VPlanTransforms::legalizeStridedAccess, *Plan,
CostCtx, Range);

for (ElementCount VF : Range)
Plan->addVF(VF);
Plan->setName("Initial VPlan");

auto BlockNeedsPredication = [this](BasicBlock *BB) {
return Legal->blockNeedsPredication(BB);
};
Expand Down
93 changes: 82 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPInterleaveEVLSC:
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPIRInstructionSC:
case VPRecipeBase::VPWidenStridedLoadSC:
case VPRecipeBase::VPWidenLoadEVLSC:
case VPRecipeBase::VPWidenLoadSC:
case VPRecipeBase::VPWidenStoreEVLSC:
Expand Down Expand Up @@ -1769,10 +1770,6 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
Type *SourceElementTy;

bool isPointerLoopInvariant() const {
return getOperand(0)->isDefinedOutsideLoopRegions();
}

bool isIndexLoopInvariant(unsigned I) const {
return getOperand(I + 1)->isDefinedOutsideLoopRegions();
}
Expand Down Expand Up @@ -1805,6 +1802,29 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags {
/// This recipe generates a GEP instruction.
unsigned getOpcode() const { return Instruction::GetElementPtr; }

bool isPointerLoopInvariant() const {
return getOperand(0)->isDefinedOutsideLoopRegions();
}

std::optional<unsigned> getUniqueVariantIndex() const {
std::optional<unsigned> VarIdx;
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
if (isIndexLoopInvariant(I))
continue;

if (VarIdx)
return std::nullopt;
VarIdx = I;
}
return VarIdx;
}

Type *getIndexedType(unsigned I) const {
auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
SmallVector<Value *, 4> Ops(GEP->idx_begin(), GEP->idx_begin() + I);
return GetElementPtrInst::getIndexedType(SourceElementTy, Ops);
}

/// Generate the gep nodes.
void execute(VPTransformState &State) override;

Expand Down Expand Up @@ -1895,20 +1915,23 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
#endif
};

/// A recipe to compute the pointers for widened memory accesses of IndexTy.
/// A recipe to compute the pointers for widened memory accesses of IndexedTy,
/// with the Stride expressed in units of IndexedTy.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
public VPUnrollPartAccessor<2> {
Type *SourceElementTy;

public:
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlags, DL),
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC,
ArrayRef<VPValue *>({Ptr, Stride}), GEPFlags, DL),
SourceElementTy(SourceElementTy) {}

VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)

VPValue *getStride() const { return getOperand(1); }

void execute(VPTransformState &State) override;

Type *getSourceElementType() const { return SourceElementTy; }
Expand All @@ -1929,7 +1952,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,

VPVectorPointerRecipe *clone() override {
return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
getGEPNoWrapFlags(), getDebugLoc());
getStride(), getGEPNoWrapFlags(),
getDebugLoc());
}

/// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
Expand Down Expand Up @@ -3186,7 +3210,8 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStridedLoadSC;
}

static inline bool classof(const VPUser *U) {
Expand Down Expand Up @@ -3307,6 +3332,52 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
}
};

/// A recipe for strided load operations, using the base address, stride, and an
/// optional mask. This recipe will generate an vp.strided.load intrinsic call
/// to represent memory accesses with a fixed stride.
struct VPWidenStridedLoadRecipe final : public VPWidenMemoryRecipe,
public VPValue {
VPWidenStridedLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Stride,
VPValue *VF, VPValue *Mask,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(
VPDef::VPWidenStridedLoadSC, Load, {Addr, Stride, VF},
/*Consecutive=*/false, /*Reverse=*/false, Metadata, DL),
VPValue(this, &Load) {
setMask(Mask);
}

VPWidenStridedLoadRecipe *clone() override {
return new VPWidenStridedLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
getStride(), getVF(), getMask(), *this,
getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenStridedLoadSC);

/// Return the stride operand.
VPValue *getStride() const { return getOperand(1); }

/// Return the VF operand.
VPValue *getVF() const { return getOperand(2); }

/// Generate a strided load.
void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getAddr() || Op == getStride() || Op == getVF();
}
};

/// A recipe for widening store operations, using the stored value, the address
/// to store to and an optional mask.
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,10 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
}

Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
"Store recipes should not define any values");
assert(
(isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe>(
R)) &&
"Store recipes should not define any values");
return cast<LoadInst>(&R->getIngredient())->getType();
}

Expand Down
Loading