Skip to content

Commit 13357e8

Browse files
authored
[LV][EVL] Support interleaved access with tail folding by EVL (#152070)
The InterleavedAccess pass already supports transforming vector-predicated (vp) load/store intrinsics. With this patch, we start enabling interleaved access under tail folding by EVL. This patch introduces a new base class, VPInterleaveBase, and a concrete class, VPInterleaveEVLRecipe. Both the existing VPInterleaveRecipe and the new VPInterleaveEVLRecipe inherit from and implement VPInterleaveBase. Compared to VPInterleaveRecipe, VPInterleaveEVLRecipe adds an EVL operand to emit vp.load/vp.store intrinsics. Currently, tail folding by EVL is only supported for scalable vectorization. Therefore, VPInterleaveEVLRecipe will only emit interleave/deinterleave intrinsics. Reverse accesses are not yet implemented, as masked reverse interleaved access under tail folding is not yet supported. Fixed #123201
1 parent eb7f6a5 commit 13357e8

File tree

11 files changed

+430
-295
lines changed

11 files changed

+430
-295
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4088,6 +4088,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40884088
case VPDef::VPWidenIntOrFpInductionSC:
40894089
case VPDef::VPWidenPointerInductionSC:
40904090
case VPDef::VPReductionPHISC:
4091+
case VPDef::VPInterleaveEVLSC:
40914092
case VPDef::VPInterleaveSC:
40924093
case VPDef::VPWidenLoadEVLSC:
40934094
case VPDef::VPWidenLoadSC:
@@ -4116,8 +4117,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
41164117

41174118
// If no def nor is a store, e.g., branches, continue - no value to check.
41184119
if (R.getNumDefinedValues() == 0 &&
4119-
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveRecipe>(
4120-
&R))
4120+
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveBase>(&R))
41214121
continue;
41224122
// For multi-def recipes, currently only interleaved loads, suffice to
41234123
// check first def only.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 125 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
560560
case VPRecipeBase::VPPartialReductionSC:
561561
return true;
562562
case VPRecipeBase::VPBranchOnMaskSC:
563+
case VPRecipeBase::VPInterleaveEVLSC:
563564
case VPRecipeBase::VPInterleaveSC:
564565
case VPRecipeBase::VPIRInstructionSC:
565566
case VPRecipeBase::VPWidenLoadEVLSC:
@@ -2445,12 +2446,13 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
24452446
}
24462447
};
24472448

2448-
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2449-
/// or stores into one wide load/store and shuffles. The first operand of a
2450-
/// VPInterleave recipe is the address, followed by the stored values, followed
2451-
/// by an optional mask.
2452-
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
2453-
public VPIRMetadata {
2449+
/// A common base class for interleaved memory operations.
2450+
/// An Interleaved memory operation is a memory access method that combines
2451+
/// multiple strided loads/stores into a single wide load/store with shuffles.
2452+
/// The first operand is the start address. The optional operands are, in order,
2453+
/// the stored values and the mask.
2454+
class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase,
2455+
public VPIRMetadata {
24542456
const InterleaveGroup<Instruction> *IG;
24552457

24562458
/// Indicates if the interleave group is in a conditional block and requires a
@@ -2461,12 +2463,14 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
24612463
/// unusued gaps can be loaded speculatively.
24622464
bool NeedsMaskForGaps = false;
24632465

2464-
public:
2465-
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2466-
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2467-
bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2468-
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, DL), VPIRMetadata(MD),
2469-
IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
2466+
protected:
2467+
VPInterleaveBase(const unsigned char SC,
2468+
const InterleaveGroup<Instruction> *IG,
2469+
ArrayRef<VPValue *> Operands,
2470+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2471+
bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2472+
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2473+
NeedsMaskForGaps(NeedsMaskForGaps) {
24702474
// TODO: extend the masked interleaved-group support to reversed access.
24712475
assert((!Mask || !IG->isReverse()) &&
24722476
"Reversed masked interleave-group not supported.");
@@ -2484,14 +2488,19 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
24842488
addOperand(Mask);
24852489
}
24862490
}
2487-
~VPInterleaveRecipe() override = default;
24882491

2489-
VPInterleaveRecipe *clone() override {
2490-
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2491-
NeedsMaskForGaps, *this, getDebugLoc());
2492+
public:
2493+
VPInterleaveBase *clone() override = 0;
2494+
2495+
static inline bool classof(const VPRecipeBase *R) {
2496+
return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2497+
R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
24922498
}
24932499

2494-
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2500+
static inline bool classof(const VPUser *U) {
2501+
auto *R = dyn_cast<VPRecipeBase>(U);
2502+
return R && classof(R);
2503+
}
24952504

24962505
/// Return the address accessed by this recipe.
24972506
VPValue *getAddr() const {
@@ -2501,48 +2510,130 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase,
25012510
/// Return the mask used by this recipe. Note that a full mask is represented
25022511
/// by a nullptr.
25032512
VPValue *getMask() const {
2504-
// Mask is optional and therefore the last, currently 2nd operand.
2513+
// Mask is optional and the last operand.
25052514
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
25062515
}
25072516

2517+
/// Return true if the access needs a mask because of the gaps.
2518+
bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2519+
2520+
const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; }
2521+
2522+
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2523+
2524+
void execute(VPTransformState &State) override {
2525+
llvm_unreachable("VPInterleaveBase should not be instantiated.");
2526+
}
2527+
2528+
/// Return the cost of this recipe.
2529+
InstructionCost computeCost(ElementCount VF,
2530+
VPCostContext &Ctx) const override;
2531+
2532+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2533+
virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
2534+
2535+
/// Returns the number of stored operands of this interleave group. Returns 0
2536+
/// for load interleave groups.
2537+
virtual unsigned getNumStoreOperands() const = 0;
2538+
25082539
/// Return the VPValues stored by this interleave group. If it is a load
25092540
/// interleave group, return an empty ArrayRef.
25102541
ArrayRef<VPValue *> getStoredValues() const {
2511-
// The first operand is the address, followed by the stored values, followed
2512-
// by an optional mask.
2513-
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2514-
.slice(1, getNumStoreOperands());
2542+
return ArrayRef<VPValue *>(op_end() -
2543+
(getNumStoreOperands() + (HasMask ? 1 : 0)),
2544+
getNumStoreOperands());
25152545
}
2546+
};
2547+
2548+
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2549+
/// or stores into one wide load/store and shuffles. The first operand of a
2550+
/// VPInterleave recipe is the address, followed by the stored values, followed
2551+
/// by an optional mask.
2552+
class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2553+
public:
2554+
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2555+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2556+
bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2557+
: VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2558+
NeedsMaskForGaps, MD, DL) {}
2559+
2560+
~VPInterleaveRecipe() override = default;
2561+
2562+
VPInterleaveRecipe *clone() override {
2563+
return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(),
2564+
getStoredValues(), getMask(),
2565+
needsMaskForGaps(), *this, getDebugLoc());
2566+
}
2567+
2568+
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
25162569

25172570
/// Generate the wide load or store, and shuffles.
25182571
void execute(VPTransformState &State) override;
25192572

2520-
/// Return the cost of this VPInterleaveRecipe.
2521-
InstructionCost computeCost(ElementCount VF,
2522-
VPCostContext &Ctx) const override;
2523-
25242573
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
25252574
/// Print the recipe.
25262575
void print(raw_ostream &O, const Twine &Indent,
25272576
VPSlotTracker &SlotTracker) const override;
25282577
#endif
25292578

2530-
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2579+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2580+
assert(is_contained(operands(), Op) &&
2581+
"Op must be an operand of the recipe");
2582+
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2583+
}
25312584

2532-
/// Returns the number of stored operands of this interleave group. Returns 0
2533-
/// for load interleave groups.
2534-
unsigned getNumStoreOperands() const {
2535-
return getNumOperands() - (HasMask ? 2 : 1);
2585+
unsigned getNumStoreOperands() const override {
2586+
return getNumOperands() - (getMask() ? 2 : 1);
2587+
}
2588+
};
2589+
2590+
/// A recipe for interleaved memory operations with vector-predication
2591+
/// intrinsics. The first operand is the address, the second operand is the
2592+
/// explicit vector length. Stored values and mask are optional operands.
2593+
class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2594+
public:
2595+
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2596+
: VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2597+
ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2598+
R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2599+
R.getDebugLoc()) {
2600+
assert(!getInterleaveGroup()->isReverse() &&
2601+
"Reversed interleave-group with tail folding is not supported.");
2602+
assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2603+
"supported for scalable vector.");
2604+
}
2605+
2606+
~VPInterleaveEVLRecipe() override = default;
2607+
2608+
VPInterleaveEVLRecipe *clone() override {
2609+
llvm_unreachable("cloning not implemented yet");
25362610
}
25372611

2538-
/// The recipe only uses the first lane of the address.
2612+
VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2613+
2614+
/// The VPValue of the explicit vector length.
2615+
VPValue *getEVL() const { return getOperand(1); }
2616+
2617+
/// Generate the wide load or store, and shuffles.
2618+
void execute(VPTransformState &State) override;
2619+
2620+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2621+
/// Print the recipe.
2622+
void print(raw_ostream &O, const Twine &Indent,
2623+
VPSlotTracker &SlotTracker) const override;
2624+
#endif
2625+
2626+
/// The recipe only uses the first lane of the address, and EVL operand.
25392627
bool onlyFirstLaneUsed(const VPValue *Op) const override {
25402628
assert(is_contained(operands(), Op) &&
25412629
"Op must be an operand of the recipe");
2542-
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2630+
return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2631+
Op == getEVL();
25432632
}
25442633

2545-
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2634+
unsigned getNumStoreOperands() const override {
2635+
return getNumOperands() - (getMask() ? 3 : 2);
2636+
}
25462637
};
25472638

25482639
/// A recipe to represent inloop reduction operations, performing a reduction on

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
296296
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
297297
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
298298
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
299-
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
299+
.Case<VPInterleaveBase>([V](const auto *R) {
300300
// TODO: Use info from interleave group.
301301
return V->getUnderlyingValue()->getType();
302302
})

0 commit comments

Comments
 (0)