Skip to content

Commit 1f680a5

Browse files
committed
Support EVL interleave access
1 parent 6ca6d45 commit 1f680a5

File tree

11 files changed

+402
-290
lines changed

11 files changed

+402
-290
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4170,6 +4170,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
41704170
case VPDef::VPWidenIntOrFpInductionSC:
41714171
case VPDef::VPWidenPointerInductionSC:
41724172
case VPDef::VPReductionPHISC:
4173+
case VPDef::VPInterleaveEVLSC:
41734174
case VPDef::VPInterleaveSC:
41744175
case VPDef::VPWidenLoadEVLSC:
41754176
case VPDef::VPWidenLoadSC:
@@ -4198,8 +4199,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
41984199

41994200
// If no def nor is a store, e.g., branches, continue - no value to check.
42004201
if (R.getNumDefinedValues() == 0 &&
4201-
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveRecipe>(
4202-
&R))
4202+
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveBase>(&R))
42034203
continue;
42044204
// For multi-def recipes, currently only interleaved loads, suffice to
42054205
// check first def only.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 137 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
557557
case VPRecipeBase::VPPartialReductionSC:
558558
return true;
559559
case VPRecipeBase::VPBranchOnMaskSC:
560+
case VPRecipeBase::VPInterleaveEVLSC:
560561
case VPRecipeBase::VPInterleaveSC:
561562
case VPRecipeBase::VPIRInstructionSC:
562563
case VPRecipeBase::VPWidenLoadEVLSC:
@@ -2385,11 +2386,14 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
23852386
}
23862387
};
23872388

2388-
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2389-
/// or stores into one wide load/store and shuffles. The first operand of a
2390-
/// VPInterleave recipe is the address, followed by the stored values, followed
2391-
/// by an optional mask.
2392-
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
2389+
/// A common base class for interleaved memory operations.
2390+
/// Interleaved memory operation is a memory access method that combines
2391+
/// multiple strided loads/stores into a single wide load/store with shuffles.
2392+
/// The first operand must be the address. The optional operands are, in order,
2393+
/// the stored values and the mask.
2394+
/// TODO: Inherit from VPIRMetadata
2395+
class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase {
2396+
protected:
23932397
const InterleaveGroup<Instruction> *IG;
23942398

23952399
/// Indicates if the interleave group is in a conditional block and requires a
@@ -2400,14 +2404,13 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
24002404
/// unusued gaps can be loaded speculatively.
24012405
bool NeedsMaskForGaps = false;
24022406

2403-
public:
2404-
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2405-
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2406-
bool NeedsMaskForGaps, DebugLoc DL)
2407-
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
2408-
DL),
2409-
2410-
IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
2407+
VPInterleaveBase(const unsigned char SC,
2408+
const InterleaveGroup<Instruction> *IG,
2409+
ArrayRef<VPValue *> Operands,
2410+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2411+
bool NeedsMaskForGaps, DebugLoc DL)
2412+
: VPRecipeBase(SC, Operands, DL), IG(IG),
2413+
NeedsMaskForGaps(NeedsMaskForGaps) {
24112414
// TODO: extend the masked interleaved-group support to reversed access.
24122415
assert((!Mask || !IG->isReverse()) &&
24132416
"Reversed masked interleave-group not supported.");
@@ -2420,70 +2423,169 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
24202423

24212424
for (auto *SV : StoredValues)
24222425
addOperand(SV);
2426+
24232427
if (Mask) {
24242428
HasMask = true;
24252429
addOperand(Mask);
24262430
}
24272431
}
2428-
~VPInterleaveRecipe() override = default;
24292432

2430-
VPInterleaveRecipe *clone() override {
2431-
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2432-
NeedsMaskForGaps, getDebugLoc());
2433+
public:
2434+
VPInterleaveBase *clone() override {
2435+
llvm_unreachable("cloning not supported");
24332436
}
24342437

2435-
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2438+
static inline bool classof(const VPRecipeBase *R) {
2439+
return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2440+
R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2441+
}
2442+
2443+
static inline bool classof(const VPUser *U) {
2444+
auto *R = dyn_cast<VPRecipeBase>(U);
2445+
return R && classof(R);
2446+
}
24362447

24372448
/// Return the address accessed by this recipe.
24382449
VPValue *getAddr() const {
24392450
return getOperand(0); // Address is the 1st, mandatory operand.
24402451
}
24412452

2453+
/// Return true if the access needs a mask because of the gaps.
2454+
bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2455+
24422456
/// Return the mask used by this recipe. Note that a full mask is represented
24432457
/// by a nullptr.
24442458
VPValue *getMask() const {
2445-
// Mask is optional and therefore the last, currently 2nd operand.
2459+
// Mask is optional and the last operand.
24462460
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
24472461
}
24482462

2463+
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2464+
2465+
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2466+
2467+
void execute(VPTransformState &State) override {
2468+
llvm_unreachable("VPInterleaveBase should not be instantiated.");
2469+
}
2470+
2471+
/// Return the cost of this VPInterleaveRecipe.
2472+
InstructionCost computeCost(ElementCount VF,
2473+
VPCostContext &Ctx) const override;
2474+
2475+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2476+
virtual bool onlyFirstLaneUsed(const VPValue *Op) const = 0;
2477+
2478+
/// Returns the number of stored operands of this interleave group. Returns 0
2479+
/// for load interleave groups.
2480+
virtual unsigned getNumStoreOperands() const = 0;
2481+
24492482
/// Return the VPValues stored by this interleave group. If it is a load
24502483
/// interleave group, return an empty ArrayRef.
2451-
ArrayRef<VPValue *> getStoredValues() const {
2452-
// The first operand is the address, followed by the stored values, followed
2453-
// by an optional mask.
2454-
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2455-
.slice(1, getNumStoreOperands());
2484+
virtual ArrayRef<VPValue *> getStoredValues() const = 0;
2485+
};
2486+
2487+
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2488+
/// or stores into one wide load/store and shuffles. The first operand of a
2489+
/// VPInterleave recipe is the address, followed by the stored values, followed
2490+
/// by an optional mask.
2491+
class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2492+
public:
2493+
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2494+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2495+
bool NeedsMaskForGaps, DebugLoc DL)
2496+
: VPInterleaveBase(VPDef::VPInterleaveSC, IG, ArrayRef<VPValue *>({Addr}),
2497+
StoredValues, Mask, NeedsMaskForGaps, DL) {}
2498+
2499+
~VPInterleaveRecipe() override = default;
2500+
2501+
VPInterleaveRecipe *clone() override {
2502+
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2503+
NeedsMaskForGaps, getDebugLoc());
24562504
}
24572505

2506+
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2507+
24582508
/// Generate the wide load or store, and shuffles.
24592509
void execute(VPTransformState &State) override;
24602510

2461-
/// Return the cost of this VPInterleaveRecipe.
2462-
InstructionCost computeCost(ElementCount VF,
2463-
VPCostContext &Ctx) const override;
2464-
24652511
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
24662512
/// Print the recipe.
24672513
void print(raw_ostream &O, const Twine &Indent,
24682514
VPSlotTracker &SlotTracker) const override;
24692515
#endif
24702516

2471-
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2517+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2518+
assert(is_contained(operands(), Op) &&
2519+
"Op must be an operand of the recipe");
2520+
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2521+
}
24722522

2473-
/// Returns the number of stored operands of this interleave group. Returns 0
2474-
/// for load interleave groups.
2475-
unsigned getNumStoreOperands() const {
2523+
unsigned getNumStoreOperands() const override {
24762524
return getNumOperands() - (HasMask ? 2 : 1);
24772525
}
24782526

2479-
/// The recipe only uses the first lane of the address.
2527+
ArrayRef<VPValue *> getStoredValues() const override {
2528+
// The first operand is the address, followed by the stored values, followed
2529+
// by an optional mask.
2530+
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2531+
.slice(1, getNumStoreOperands());
2532+
}
2533+
};
2534+
2535+
/// A recipe for interleaved access operations with vector-predication
2536+
/// intrinsics. The first operand is the address, the second operand is the
2537+
/// explicit vector length . Stored values and mask are optional operands.
2538+
class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2539+
public:
2540+
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2541+
: VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2542+
ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2543+
R.getStoredValues(), Mask, R.needsMaskForGaps(),
2544+
R.getDebugLoc()) {
2545+
assert(!IG->isReverse() &&
2546+
"Reversed interleave-group with tail folding is not supported.");
2547+
assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2548+
"supported for scalable vector.");
2549+
}
2550+
2551+
~VPInterleaveEVLRecipe() override = default;
2552+
2553+
VPInterleaveEVLRecipe *clone() override {
2554+
llvm_unreachable("cloning not implemented yet");
2555+
}
2556+
2557+
VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2558+
2559+
/// The VPValue of the explicit vector length.
2560+
VPValue *getEVL() const { return getOperand(1); }
2561+
2562+
/// Generate the wide load or store, and shuffles.
2563+
void execute(VPTransformState &State) override;
2564+
2565+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2566+
/// Print the recipe.
2567+
void print(raw_ostream &O, const Twine &Indent,
2568+
VPSlotTracker &SlotTracker) const override;
2569+
#endif
2570+
2571+
/// The recipe only uses the first lane of the address, and EVL operand.
24802572
bool onlyFirstLaneUsed(const VPValue *Op) const override {
24812573
assert(is_contained(operands(), Op) &&
24822574
"Op must be an operand of the recipe");
2483-
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2575+
return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2576+
Op == getEVL();
24842577
}
24852578

2486-
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2579+
unsigned getNumStoreOperands() const override {
2580+
return getNumOperands() - (HasMask ? 3 : 2);
2581+
}
2582+
2583+
ArrayRef<VPValue *> getStoredValues() const override {
2584+
// The first operand is the address, and the second operand is EVL, followed
2585+
// by the stored values, followe by an optional mask.
2586+
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2587+
.slice(2, getNumStoreOperands());
2588+
}
24872589
};
24882590

24892591
/// A recipe to represent inloop reduction operations, performing a reduction on

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
297297
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
298298
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
299299
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
300-
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
300+
.Case<VPInterleaveRecipe, VPInterleaveEVLRecipe>([V](const auto *R) {
301301
// TODO: Use info from interleave group.
302302
return V->getUnderlyingValue()->getType();
303303
})

0 commit comments

Comments
 (0)