Skip to content

Commit a2ca7fd

Browse files
committed
New VPWidenStridedLoadRecipe
1 parent 542703f commit a2ca7fd

File tree

7 files changed

+119
-15
lines changed

7 files changed

+119
-15
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3961,7 +3961,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
39613961
[](const auto *R) { return Instruction::Select; })
39623962
.Case<VPWidenStoreRecipe>(
39633963
[](const auto *R) { return Instruction::Store; })
3964-
.Case<VPWidenLoadRecipe>(
3964+
.Case<VPWidenLoadRecipe, VPWidenStridedLoadRecipe>(
39653965
[](const auto *R) { return Instruction::Load; })
39663966
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
39673967
[](const auto *R) { return Instruction::Call; })
@@ -4061,6 +4061,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40614061
case VPDef::VPReductionPHISC:
40624062
case VPDef::VPInterleaveEVLSC:
40634063
case VPDef::VPInterleaveSC:
4064+
case VPDef::VPWidenStridedLoadSC:
40644065
case VPDef::VPWidenLoadEVLSC:
40654066
case VPDef::VPWidenLoadSC:
40664067
case VPDef::VPWidenStoreEVLSC:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
559559
case VPRecipeBase::VPInterleaveEVLSC:
560560
case VPRecipeBase::VPInterleaveSC:
561561
case VPRecipeBase::VPIRInstructionSC:
562+
case VPRecipeBase::VPWidenStridedLoadSC:
562563
case VPRecipeBase::VPWidenLoadEVLSC:
563564
case VPRecipeBase::VPWidenLoadSC:
564565
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -3205,7 +3206,8 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
32053206
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
32063207
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
32073208
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3208-
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3209+
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC ||
3210+
R->getVPDefID() == VPRecipeBase::VPWidenStridedLoadSC;
32093211
}
32103212

32113213
static inline bool classof(const VPUser *U) {
@@ -3326,6 +3328,52 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
33263328
}
33273329
};
33283330

3331+
/// A recipe for strided load operations, using the base address, stride, and an
3332+
/// optional mask. This recipe will generate an vp.strided.load intrinsic call
3333+
/// to represent memory accesses with a fixed stride.
3334+
struct VPWidenStridedLoadRecipe final : public VPWidenMemoryRecipe,
3335+
public VPValue {
3336+
VPWidenStridedLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Stride,
3337+
VPValue *VF, VPValue *Mask,
3338+
const VPIRMetadata &Metadata, DebugLoc DL)
3339+
: VPWidenMemoryRecipe(
3340+
VPDef::VPWidenStridedLoadSC, Load, {Addr, Stride, VF},
3341+
/*Consecutive=*/false, /*Reverse=*/false, Metadata, DL),
3342+
VPValue(this, &Load) {
3343+
setMask(Mask);
3344+
}
3345+
3346+
VPWidenStridedLoadRecipe *clone() override {
3347+
return new VPWidenStridedLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3348+
getStride(), getVF(), getMask(), *this,
3349+
getDebugLoc());
3350+
}
3351+
3352+
VP_CLASSOF_IMPL(VPDef::VPWidenStridedLoadSC);
3353+
3354+
/// Return the stride operand.
3355+
VPValue *getStride() const { return getOperand(1); }
3356+
3357+
/// Return the VF operand.
3358+
VPValue *getVF() const { return getOperand(2); }
3359+
3360+
/// Generate a strided load.
3361+
void execute(VPTransformState &State) override;
3362+
3363+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3364+
/// Print the recipe.
3365+
void print(raw_ostream &O, const Twine &Indent,
3366+
VPSlotTracker &SlotTracker) const override;
3367+
#endif
3368+
3369+
/// Returns true if the recipe only uses the first lane of operand \p Op.
3370+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
3371+
assert(is_contained(operands(), Op) &&
3372+
"Op must be an operand of the recipe");
3373+
return Op == getAddr() || Op == getStride() || Op == getVF();
3374+
}
3375+
};
3376+
33293377
/// A recipe for widening store operations, using the stored value, the address
33303378
/// to store to and an optional mask.
33313379
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,10 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
190190
}
191191

192192
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
193-
assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
194-
"Store recipes should not define any values");
193+
assert(
194+
(isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe>(
195+
R)) &&
196+
"Store recipes should not define any values");
195197
return cast<LoadInst>(&R->getIngredient())->getType();
196198
}
197199

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
8383
case VPWidenCastSC:
8484
case VPWidenGEPSC:
8585
case VPWidenIntOrFpInductionSC:
86+
case VPWidenStridedLoadSC:
8687
case VPWidenLoadEVLSC:
8788
case VPWidenLoadSC:
8889
case VPWidenPHISC:
@@ -106,6 +107,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
106107
return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();
107108
case VPInstructionSC:
108109
return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
110+
case VPWidenStridedLoadSC:
109111
case VPWidenLoadEVLSC:
110112
case VPWidenLoadSC:
111113
return true;
@@ -189,6 +191,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
189191
case VPInterleaveEVLSC:
190192
case VPInterleaveSC:
191193
return mayWriteToMemory();
194+
case VPWidenStridedLoadSC:
192195
case VPWidenLoadEVLSC:
193196
case VPWidenLoadSC:
194197
case VPWidenStoreEVLSC:
@@ -3497,9 +3500,11 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
34973500
const Align Alignment = getLoadStoreAlignment(&Ingredient);
34983501
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
34993502
->getAddressSpace();
3500-
unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)
3501-
? Instruction::Load
3502-
: Instruction::Store;
3503+
unsigned Opcode =
3504+
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe>(
3505+
this)
3506+
? Instruction::Load
3507+
: Instruction::Store;
35033508

35043509
if (!Consecutive) {
35053510
// TODO: Using the original IR may not be accurate.
@@ -3509,8 +3514,11 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
35093514
"Inconsecutive memory access should not have the order.");
35103515

35113516
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
3512-
Type *PtrTy = Ptr->getType();
3517+
if (isa<VPWidenStridedLoadRecipe>(this))
3518+
return Ctx.TTI.getStridedMemoryOpCost(
3519+
Opcode, Ty, Ptr, IsMasked, Alignment, Ctx.CostKind, &Ingredient);
35133520

3521+
Type *PtrTy = Ptr->getType();
35143522
// If the address value is uniform across all lanes, then the address can be
35153523
// calculated with scalar type and broadcast.
35163524
if (!vputils::isSingleScalar(getAddr()))
@@ -3665,6 +3673,47 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
36653673
}
36663674
#endif
36673675

3676+
void VPWidenStridedLoadRecipe::execute(VPTransformState &State) {
3677+
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
3678+
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
3679+
const Align Alignment = getLoadStoreAlignment(&Ingredient);
3680+
3681+
auto &Builder = State.Builder;
3682+
Value *Addr = State.get(getAddr(), /*IsScalar*/ true);
3683+
Value *StrideInBytes = State.get(getStride(), /*IsScalar*/ true);
3684+
Value *Mask = nullptr;
3685+
if (VPValue *VPMask = getMask())
3686+
Mask = State.get(VPMask);
3687+
else
3688+
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3689+
Value *RunTimeVF = Builder.CreateZExtOrTrunc(State.get(getVF(), VPLane(0)),
3690+
Builder.getInt32Ty());
3691+
3692+
auto *PtrTy = Addr->getType();
3693+
auto *StrideTy = StrideInBytes->getType();
3694+
CallInst *NewLI = Builder.CreateIntrinsic(
3695+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
3696+
{Addr, StrideInBytes, Mask, RunTimeVF}, nullptr, "wide.strided.load");
3697+
NewLI->addParamAttr(
3698+
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
3699+
applyMetadata(*NewLI);
3700+
State.set(this, NewLI);
3701+
}
3702+
3703+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3704+
void VPWidenStridedLoadRecipe::print(raw_ostream &O, const Twine &Indent,
3705+
VPSlotTracker &SlotTracker) const {
3706+
O << Indent << "WIDEN ";
3707+
printAsOperand(O, SlotTracker);
3708+
O << " = load ";
3709+
getAddr()->printAsOperand(O, SlotTracker);
3710+
O << ", stride = ";
3711+
getStride()->printAsOperand(O, SlotTracker);
3712+
O << ", runtimeVF = ";
3713+
getVF()->printAsOperand(O, SlotTracker);
3714+
}
3715+
#endif
3716+
36683717
void VPWidenStoreRecipe::execute(VPTransformState &State) {
36693718
VPValue *StoredVPValue = getStoredValue();
36703719
bool CreateScatter = !isConsecutive();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,10 +2596,12 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
25962596
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
25972597
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
25982598

2599-
assert(all_of(Plan.getVF().users(),
2600-
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
2601-
VPWidenIntOrFpInductionRecipe>) &&
2602-
"User of VF that we can't transform to EVL.");
2599+
assert(
2600+
all_of(
2601+
Plan.getVF().users(),
2602+
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
2603+
VPWidenIntOrFpInductionRecipe, VPWidenStridedLoadRecipe>) &&
2604+
"User of VF that we can't transform to EVL.");
26032605
Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
26042606
return isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe>(U);
26052607
});
@@ -2695,8 +2697,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
26952697
"New recipe must define the same number of values as the "
26962698
"original.");
26972699
EVLRecipe->insertBefore(CurRecipe);
2698-
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe, VPInterleaveEVLRecipe>(
2699-
EVLRecipe)) {
2700+
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe, VPWidenStridedLoadRecipe,
2701+
VPInterleaveEVLRecipe>(EVLRecipe)) {
27002702
for (unsigned I = 0; I < NumDefVal; ++I) {
27012703
VPValue *CurVPV = CurRecipe->getVPValue(I);
27022704
CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(I));

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ class VPDef {
351351
VPWidenCastSC,
352352
VPWidenGEPSC,
353353
VPWidenIntrinsicSC,
354+
VPWidenStridedLoadSC,
354355
VPWidenLoadEVLSC,
355356
VPWidenLoadSC,
356357
VPWidenStoreEVLSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
158158
return VerifyEVLUse(*S, S->getNumOperands() - 1);
159159
})
160160
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe,
161-
VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
161+
VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe,
162+
VPWidenStridedLoadRecipe>(
162163
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
163164
.Case<VPScalarIVStepsRecipe>([&](auto *R) {
164165
if (R->getNumOperands() != 3) {

0 commit comments

Comments
 (0)