Skip to content

Commit f3c023f

Browse files
committed
New VPWidenStridedLoadRecipe
1 parent db389bd commit f3c023f

File tree

7 files changed

+122
-13
lines changed

7 files changed

+122
-13
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4183,7 +4183,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
41834183
[](const auto *R) { return Instruction::Select; })
41844184
.Case<VPWidenStoreRecipe>(
41854185
[](const auto *R) { return Instruction::Store; })
4186-
.Case<VPWidenLoadRecipe>(
4186+
.Case<VPWidenLoadRecipe, VPWidenStridedLoadRecipe>(
41874187
[](const auto *R) { return Instruction::Load; })
41884188
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
41894189
[](const auto *R) { return Instruction::Call; })
@@ -4282,6 +4282,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42824282
case VPDef::VPWidenPointerInductionSC:
42834283
case VPDef::VPReductionPHISC:
42844284
case VPDef::VPInterleaveSC:
4285+
case VPDef::VPWidenStridedLoadSC:
42854286
case VPDef::VPWidenLoadEVLSC:
42864287
case VPDef::VPWidenLoadSC:
42874288
case VPDef::VPWidenStoreEVLSC:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
557557
case VPRecipeBase::VPBranchOnMaskSC:
558558
case VPRecipeBase::VPInterleaveSC:
559559
case VPRecipeBase::VPIRInstructionSC:
560+
case VPRecipeBase::VPWidenStridedLoadSC:
560561
case VPRecipeBase::VPWidenLoadEVLSC:
561562
case VPRecipeBase::VPWidenLoadSC:
562563
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -2928,7 +2929,8 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
29282929
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
29292930
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
29302931
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2931-
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2932+
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC ||
2933+
R->getVPDefID() == VPRecipeBase::VPWidenStridedLoadSC;
29322934
}
29332935

29342936
static inline bool classof(const VPUser *U) {
@@ -3047,6 +3049,52 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
30473049
}
30483050
};
30493051

3052+
/// A recipe for strided load operations, using the base address, stride, and an
3053+
/// optional mask. This recipe will generate an vp.strided.load intrinsic call
3054+
/// to represent memory accesses with a fixed stride.
3055+
struct VPWidenStridedLoadRecipe final : public VPWidenMemoryRecipe,
3056+
public VPValue {
3057+
VPWidenStridedLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Stride,
3058+
VPValue *VF, VPValue *Mask,
3059+
const VPIRMetadata &Metadata, DebugLoc DL)
3060+
: VPWidenMemoryRecipe(
3061+
VPDef::VPWidenStridedLoadSC, Load, {Addr, Stride, VF},
3062+
/*Consecutive=*/false, /*Reverse=*/false, Metadata, DL),
3063+
VPValue(this, &Load) {
3064+
setMask(Mask);
3065+
}
3066+
3067+
VPWidenStridedLoadRecipe *clone() override {
3068+
return new VPWidenStridedLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3069+
getStride(), getVF(), getMask(), *this,
3070+
getDebugLoc());
3071+
}
3072+
3073+
VP_CLASSOF_IMPL(VPDef::VPWidenStridedLoadSC);
3074+
3075+
/// Return the stride operand.
3076+
VPValue *getStride() const { return getOperand(1); }
3077+
3078+
/// Return the VF operand.
3079+
VPValue *getVF() const { return getOperand(2); }
3080+
3081+
/// Generate a strided load.
3082+
void execute(VPTransformState &State) override;
3083+
3084+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3085+
/// Print the recipe.
3086+
void print(raw_ostream &O, const Twine &Indent,
3087+
VPSlotTracker &SlotTracker) const override;
3088+
#endif
3089+
3090+
/// Returns true if the recipe only uses the first lane of operand \p Op.
3091+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
3092+
assert(is_contained(operands(), Op) &&
3093+
"Op must be an operand of the recipe");
3094+
return Op == getAddr() || Op == getStride() || Op == getVF();
3095+
}
3096+
};
3097+
30503098
/// A recipe for widening store operations, using the stored value, the address
30513099
/// to store to and an optional mask.
30523100
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,10 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
184184
}
185185

186186
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
187-
assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
188-
"Store recipes should not define any values");
187+
assert(
188+
(isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe>(
189+
R)) &&
190+
"Store recipes should not define any values");
189191
return cast<LoadInst>(&R->getIngredient())->getType();
190192
}
191193

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
8080
case VPWidenCastSC:
8181
case VPWidenGEPSC:
8282
case VPWidenIntOrFpInductionSC:
83+
case VPWidenStridedLoadSC:
8384
case VPWidenLoadEVLSC:
8485
case VPWidenLoadSC:
8586
case VPWidenPHISC:
@@ -103,6 +104,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
103104
return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();
104105
case VPInstructionSC:
105106
return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
107+
case VPWidenStridedLoadSC:
106108
case VPWidenLoadEVLSC:
107109
case VPWidenLoadSC:
108110
return true;
@@ -184,6 +186,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
184186
}
185187
case VPInterleaveSC:
186188
return mayWriteToMemory();
189+
case VPWidenStridedLoadSC:
187190
case VPWidenLoadEVLSC:
188191
case VPWidenLoadSC:
189192
case VPWidenStoreEVLSC:
@@ -3063,9 +3066,11 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
30633066
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
30643067
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
30653068
->getAddressSpace();
3066-
unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)
3067-
? Instruction::Load
3068-
: Instruction::Store;
3069+
unsigned Opcode =
3070+
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe>(
3071+
this)
3072+
? Instruction::Load
3073+
: Instruction::Store;
30693074

30703075
if (!Consecutive) {
30713076
// TODO: Using the original IR may not be accurate.
@@ -3074,6 +3079,11 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
30743079
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
30753080
assert(!Reverse &&
30763081
"Inconsecutive memory access should not have the order.");
3082+
3083+
if (isa<VPWidenStridedLoadRecipe>(this))
3084+
return Ctx.TTI.getStridedMemoryOpCost(
3085+
Opcode, Ty, Ptr, IsMasked, Alignment, Ctx.CostKind, &Ingredient);
3086+
30773087
return Ctx.TTI.getAddressComputationCost(Ty) +
30783088
Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
30793089
Ctx.CostKind, &Ingredient);
@@ -3224,6 +3234,50 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
32243234
}
32253235
#endif
32263236

3237+
void VPWidenStridedLoadRecipe::execute(VPTransformState &State) {
3238+
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
3239+
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
3240+
const Align Alignment = getLoadStoreAlignment(&Ingredient);
3241+
3242+
auto &Builder = State.Builder;
3243+
Value *Addr = State.get(getAddr(), /*IsScalar*/ true);
3244+
Value *Stride = State.get(getStride(), /*IsScalar*/ true);
3245+
Value *Mask = nullptr;
3246+
if (VPValue *VPMask = getMask())
3247+
Mask = State.get(VPMask);
3248+
else
3249+
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3250+
Value *RunTimeVF = Builder.CreateZExtOrTrunc(State.get(getVF(), VPLane(0)),
3251+
Builder.getInt32Ty());
3252+
3253+
auto *PtrTy = Addr->getType();
3254+
auto *StrideTy = Stride->getType();
3255+
const DataLayout &DL = Ingredient.getDataLayout();
3256+
Value *StrideInBytes = Builder.CreateMul(
3257+
Stride, ConstantInt::get(StrideTy, DL.getTypeAllocSize(ScalarDataTy)));
3258+
CallInst *NewLI = Builder.CreateIntrinsic(
3259+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
3260+
{Addr, StrideInBytes, Mask, RunTimeVF}, nullptr, "wide.strided.load");
3261+
NewLI->addParamAttr(
3262+
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
3263+
applyMetadata(*NewLI);
3264+
State.set(this, NewLI);
3265+
}
3266+
3267+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3268+
void VPWidenStridedLoadRecipe::print(raw_ostream &O, const Twine &Indent,
3269+
VPSlotTracker &SlotTracker) const {
3270+
O << Indent << "WIDEN ";
3271+
printAsOperand(O, SlotTracker);
3272+
O << " = load ";
3273+
getAddr()->printAsOperand(O, SlotTracker);
3274+
O << ", stride = ";
3275+
getStride()->printAsOperand(O, SlotTracker);
3276+
O << ", runtimeVF = ";
3277+
getVF()->printAsOperand(O, SlotTracker);
3278+
}
3279+
#endif
3280+
32273281
void VPWidenStoreRecipe::execute(VPTransformState &State) {
32283282
VPValue *StoredVPValue = getStoredValue();
32293283
bool CreateScatter = !isConsecutive();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2176,10 +2176,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
21762176
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
21772177
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
21782178

2179-
assert(all_of(Plan.getVF().users(),
2180-
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
2181-
VPWidenIntOrFpInductionRecipe>) &&
2182-
"User of VF that we can't transform to EVL.");
2179+
assert(
2180+
all_of(
2181+
Plan.getVF().users(),
2182+
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
2183+
VPWidenIntOrFpInductionRecipe, VPWidenStridedLoadRecipe>) &&
2184+
"User of VF that we can't transform to EVL.");
21832185
Plan.getVF().replaceAllUsesWith(&EVL);
21842186

21852187
// Defer erasing recipes till the end so that we don't invalidate the
@@ -2242,7 +2244,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
22422244
NumDefVal <= 1 &&
22432245
"Only supports recipes with a single definition or without users.");
22442246
EVLRecipe->insertBefore(CurRecipe);
2245-
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(EVLRecipe)) {
2247+
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe,
2248+
VPWidenStridedLoadRecipe>(EVLRecipe)) {
22462249
VPValue *CurVPV = CurRecipe->getVPSingleValue();
22472250
CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());
22482251
}

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ class VPDef {
347347
VPWidenCastSC,
348348
VPWidenGEPSC,
349349
VPWidenIntrinsicSC,
350+
VPWidenStridedLoadSC,
350351
VPWidenLoadEVLSC,
351352
VPWidenLoadSC,
352353
VPWidenStoreEVLSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
157157
return VerifyEVLUse(*S, S->getNumOperands() - 1);
158158
})
159159
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe,
160-
VPWidenIntOrFpInductionRecipe>(
160+
VPWidenIntOrFpInductionRecipe, VPWidenStridedLoadRecipe>(
161161
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
162162
.Case<VPScalarIVStepsRecipe>([&](auto *R) {
163163
if (R->getNumOperands() != 3) {

0 commit comments

Comments
 (0)