Skip to content

Commit 16d5c02

Browse files
committed
Support EVL interleave access
1 parent 02ab6f3 commit 16d5c02

File tree

11 files changed

+401
-290
lines changed

11 files changed

+401
-290
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4173,6 +4173,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
41734173
case VPDef::VPWidenIntOrFpInductionSC:
41744174
case VPDef::VPWidenPointerInductionSC:
41754175
case VPDef::VPReductionPHISC:
4176+
case VPDef::VPInterleaveEVLSC:
41764177
case VPDef::VPInterleaveSC:
41774178
case VPDef::VPWidenLoadEVLSC:
41784179
case VPDef::VPWidenLoadSC:
@@ -4201,8 +4202,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42014202

42024203
// If no def nor is a store, e.g., branches, continue - no value to check.
42034204
if (R.getNumDefinedValues() == 0 &&
4204-
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveRecipe>(
4205-
&R))
4205+
!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe, VPInterleaveBase>(&R))
42064206
continue;
42074207
// For multi-def recipes, currently only interleaved loads, suffice to
42084208
// check first def only.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 136 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
557557
case VPRecipeBase::VPPartialReductionSC:
558558
return true;
559559
case VPRecipeBase::VPBranchOnMaskSC:
560+
case VPRecipeBase::VPInterleaveEVLSC:
560561
case VPRecipeBase::VPInterleaveSC:
561562
case VPRecipeBase::VPIRInstructionSC:
562563
case VPRecipeBase::VPWidenLoadEVLSC:
@@ -2387,11 +2388,14 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
23872388
}
23882389
};
23892390

2390-
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2391-
/// or stores into one wide load/store and shuffles. The first operand of a
2392-
/// VPInterleave recipe is the address, followed by the stored values, followed
2393-
/// by an optional mask.
2394-
class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
2391+
/// A common base class for interleaved memory operations.
2392+
/// Interleaved memory operation is a memory access method that combines
2393+
/// multiple strided loads/stores into a single wide load/store with shuffles.
2394+
/// The first operand must be the address. The optional operands are, in order,
2395+
/// the stored values and the mask.
2396+
/// TODO: Inherit from VPIRMetadata
2397+
class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase {
2398+
protected:
23952399
const InterleaveGroup<Instruction> *IG;
23962400

23972401
/// Indicates if the interleave group is in a conditional block and requires a
@@ -2402,14 +2406,13 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
24022406
/// unusued gaps can be loaded speculatively.
24032407
bool NeedsMaskForGaps = false;
24042408

2405-
public:
2406-
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2407-
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2408-
bool NeedsMaskForGaps, DebugLoc DL)
2409-
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
2410-
DL),
2411-
2412-
IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
2409+
VPInterleaveBase(const unsigned char SC,
2410+
const InterleaveGroup<Instruction> *IG,
2411+
ArrayRef<VPValue *> Operands,
2412+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2413+
bool NeedsMaskForGaps, DebugLoc DL)
2414+
: VPRecipeBase(SC, Operands, DL), IG(IG),
2415+
NeedsMaskForGaps(NeedsMaskForGaps) {
24132416
// TODO: extend the masked interleaved-group support to reversed access.
24142417
assert((!Mask || !IG->isReverse()) &&
24152418
"Reversed masked interleave-group not supported.");
@@ -2427,65 +2430,163 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
24272430
addOperand(Mask);
24282431
}
24292432
}
2430-
~VPInterleaveRecipe() override = default;
24312433

2432-
VPInterleaveRecipe *clone() override {
2433-
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2434-
NeedsMaskForGaps, getDebugLoc());
2434+
public:
2435+
VPInterleaveBase *clone() override {
2436+
llvm_unreachable("cloning not supported");
24352437
}
24362438

2437-
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2439+
static inline bool classof(const VPRecipeBase *R) {
2440+
return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2441+
R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2442+
}
2443+
2444+
static inline bool classof(const VPUser *U) {
2445+
auto *R = dyn_cast<VPRecipeBase>(U);
2446+
return R && classof(R);
2447+
}
24382448

24392449
/// Return the address accessed by this recipe.
24402450
VPValue *getAddr() const {
24412451
return getOperand(0); // Address is the 1st, mandatory operand.
24422452
}
24432453

2454+
/// Return true if the access needs a mask because of the gaps.
2455+
bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2456+
24442457
/// Return the mask used by this recipe. Note that a full mask is represented
24452458
/// by a nullptr.
24462459
VPValue *getMask() const {
2447-
// Mask is optional and therefore the last, currently 2nd operand.
2460+
// Mask is optional and the last operand.
24482461
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
24492462
}
24502463

2464+
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2465+
2466+
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2467+
2468+
void execute(VPTransformState &State) override {
2469+
llvm_unreachable("VPInterleaveBase should not be instantiated.");
2470+
}
2471+
2472+
/// Return the cost of this VPInterleaveRecipe.
2473+
InstructionCost computeCost(ElementCount VF,
2474+
VPCostContext &Ctx) const override;
2475+
2476+
/// Returns true if the recipe only uses the first lane of operand \p Op.
2477+
virtual bool onlyFirstLaneUsed(const VPValue *Op) const = 0;
2478+
2479+
/// Returns the number of stored operands of this interleave group. Returns 0
2480+
/// for load interleave groups.
2481+
virtual unsigned getNumStoreOperands() const = 0;
2482+
24512483
/// Return the VPValues stored by this interleave group. If it is a load
24522484
/// interleave group, return an empty ArrayRef.
2453-
ArrayRef<VPValue *> getStoredValues() const {
2454-
// The first operand is the address, followed by the stored values, followed
2455-
// by an optional mask.
2456-
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2457-
.slice(1, getNumStoreOperands());
2485+
virtual ArrayRef<VPValue *> getStoredValues() const = 0;
2486+
};
2487+
2488+
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2489+
/// or stores into one wide load/store and shuffles. The first operand of a
2490+
/// VPInterleave recipe is the address, followed by the stored values, followed
2491+
/// by an optional mask.
2492+
class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
2493+
public:
2494+
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2495+
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2496+
bool NeedsMaskForGaps, DebugLoc DL)
2497+
: VPInterleaveBase(VPDef::VPInterleaveSC, IG, ArrayRef<VPValue *>({Addr}),
2498+
StoredValues, Mask, NeedsMaskForGaps, DL) {}
2499+
2500+
~VPInterleaveRecipe() override = default;
2501+
2502+
VPInterleaveRecipe *clone() override {
2503+
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2504+
NeedsMaskForGaps, getDebugLoc());
24582505
}
24592506

2507+
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2508+
24602509
/// Generate the wide load or store, and shuffles.
24612510
void execute(VPTransformState &State) override;
24622511

2463-
/// Return the cost of this VPInterleaveRecipe.
2464-
InstructionCost computeCost(ElementCount VF,
2465-
VPCostContext &Ctx) const override;
2466-
24672512
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
24682513
/// Print the recipe.
24692514
void print(raw_ostream &O, const Twine &Indent,
24702515
VPSlotTracker &SlotTracker) const override;
24712516
#endif
24722517

2473-
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2518+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2519+
assert(is_contained(operands(), Op) &&
2520+
"Op must be an operand of the recipe");
2521+
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2522+
}
24742523

2475-
/// Returns the number of stored operands of this interleave group. Returns 0
2476-
/// for load interleave groups.
2477-
unsigned getNumStoreOperands() const {
2524+
unsigned getNumStoreOperands() const override {
24782525
return getNumOperands() - (HasMask ? 2 : 1);
24792526
}
24802527

2481-
/// The recipe only uses the first lane of the address.
2528+
ArrayRef<VPValue *> getStoredValues() const override {
2529+
// The first operand is the address, followed by the stored values, followed
2530+
// by an optional mask.
2531+
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2532+
.slice(1, getNumStoreOperands());
2533+
}
2534+
};
2535+
2536+
/// A recipe for interleaved access operations with vector-predication
2537+
/// intrinsics. The first operand is the address, the second operand is the
2538+
/// explicit vector length . Stored values and mask are optional operands.
2539+
class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
2540+
public:
2541+
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
2542+
: VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2543+
ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2544+
R.getStoredValues(), Mask, R.needsMaskForGaps(),
2545+
R.getDebugLoc()) {
2546+
assert(!IG->isReverse() &&
2547+
"Reversed interleave-group with tail folding is not supported.");
2548+
assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2549+
"supported for scalable vector.");
2550+
}
2551+
2552+
~VPInterleaveEVLRecipe() override = default;
2553+
2554+
VPInterleaveEVLRecipe *clone() override {
2555+
llvm_unreachable("cloning not implemented yet");
2556+
}
2557+
2558+
VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2559+
2560+
/// The VPValue of the explicit vector length.
2561+
VPValue *getEVL() const { return getOperand(1); }
2562+
2563+
/// Generate the wide load or store, and shuffles.
2564+
void execute(VPTransformState &State) override;
2565+
2566+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2567+
/// Print the recipe.
2568+
void print(raw_ostream &O, const Twine &Indent,
2569+
VPSlotTracker &SlotTracker) const override;
2570+
#endif
2571+
2572+
/// The recipe only uses the first lane of the address, and EVL operand.
24822573
bool onlyFirstLaneUsed(const VPValue *Op) const override {
24832574
assert(is_contained(operands(), Op) &&
24842575
"Op must be an operand of the recipe");
2485-
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2576+
return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2577+
Op == getEVL();
24862578
}
24872579

2488-
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2580+
unsigned getNumStoreOperands() const override {
2581+
return getNumOperands() - (HasMask ? 3 : 2);
2582+
}
2583+
2584+
ArrayRef<VPValue *> getStoredValues() const override {
2585+
// The first operand is the address, and the second operand is EVL, followed
2586+
// by the stored values, followe by an optional mask.
2587+
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2588+
.slice(2, getNumStoreOperands());
2589+
}
24892590
};
24902591

24912592
/// A recipe to represent inloop reduction operations, performing a reduction on

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
296296
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
297297
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
298298
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
299-
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
299+
.Case<VPInterleaveRecipe, VPInterleaveEVLRecipe>([V](const auto *R) {
300300
// TODO: Use info from interleave group.
301301
return V->getUnderlyingValue()->getType();
302302
})

0 commit comments

Comments
 (0)