-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LV][EVL] Support interleaved access with tail folding by EVL #152070
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ef468d5
4d31cc6
933cebb
93564d0
1456b0e
c2cffb1
4c91e3a
b64ecf6
209474e
126a1cc
ca5ec88
389b9e5
a65ea7d
a4c58f0
c569b12
306a835
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -557,6 +557,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
| case VPRecipeBase::VPPartialReductionSC: | ||
| return true; | ||
| case VPRecipeBase::VPBranchOnMaskSC: | ||
| case VPRecipeBase::VPInterleaveEVLSC: | ||
| case VPRecipeBase::VPInterleaveSC: | ||
| case VPRecipeBase::VPIRInstructionSC: | ||
| case VPRecipeBase::VPWidenLoadEVLSC: | ||
|
|
@@ -2435,12 +2436,13 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe { | |
| } | ||
| }; | ||
|
|
||
| /// VPInterleaveRecipe is a recipe for transforming an interleave group of load | ||
| /// or stores into one wide load/store and shuffles. The first operand of a | ||
| /// VPInterleave recipe is the address, followed by the stored values, followed | ||
| /// by an optional mask. | ||
| class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, | ||
| public VPIRMetadata { | ||
| /// A common base class for interleaved memory operations. | ||
| /// An Interleaved memory operation is a memory access method that combines | ||
| /// multiple strided loads/stores into a single wide load/store with shuffles. | ||
| /// The first operand is the start address. The optional operands are, in order, | ||
| /// the stored values and the mask. | ||
| class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase, | ||
| public VPIRMetadata { | ||
| const InterleaveGroup<Instruction> *IG; | ||
|
|
||
|
||
| /// Indicates if the interleave group is in a conditional block and requires a | ||
|
|
@@ -2451,12 +2453,14 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, | |
| /// unusued gaps can be loaded speculatively. | ||
| bool NeedsMaskForGaps = false; | ||
|
|
||
| public: | ||
| VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, | ||
| ArrayRef<VPValue *> StoredValues, VPValue *Mask, | ||
| bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) | ||
| : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, DL), VPIRMetadata(MD), | ||
| IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { | ||
| protected: | ||
| VPInterleaveBase(const unsigned char SC, | ||
| const InterleaveGroup<Instruction> *IG, | ||
| ArrayRef<VPValue *> Operands, | ||
| ArrayRef<VPValue *> StoredValues, VPValue *Mask, | ||
| bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) | ||
| : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG), | ||
| NeedsMaskForGaps(NeedsMaskForGaps) { | ||
| // TODO: extend the masked interleaved-group support to reversed access. | ||
| assert((!Mask || !IG->isReverse()) && | ||
| "Reversed masked interleave-group not supported."); | ||
|
|
@@ -2474,14 +2478,19 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, | |
| addOperand(Mask); | ||
| } | ||
| } | ||
| ~VPInterleaveRecipe() override = default; | ||
|
|
||
| VPInterleaveRecipe *clone() override { | ||
| return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), | ||
| NeedsMaskForGaps, *this, getDebugLoc()); | ||
| public: | ||
| VPInterleaveBase *clone() override = 0; | ||
|
|
||
| static inline bool classof(const VPRecipeBase *R) { | ||
| return R->getVPDefID() == VPRecipeBase::VPInterleaveSC || | ||
| R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC; | ||
| } | ||
|
|
||
| VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) | ||
| static inline bool classof(const VPUser *U) { | ||
| auto *R = dyn_cast<VPRecipeBase>(U); | ||
| return R && classof(R); | ||
| } | ||
|
|
||
| /// Return the address accessed by this recipe. | ||
| VPValue *getAddr() const { | ||
|
|
@@ -2491,48 +2500,130 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, | |
| /// Return the mask used by this recipe. Note that a full mask is represented | ||
| /// by a nullptr. | ||
| VPValue *getMask() const { | ||
| // Mask is optional and therefore the last, currently 2nd operand. | ||
| // Mask is optional and the last operand. | ||
| return HasMask ? getOperand(getNumOperands() - 1) : nullptr; | ||
| } | ||
|
|
||
| /// Return true if the access needs a mask because of the gaps. | ||
| bool needsMaskForGaps() const { return NeedsMaskForGaps; } | ||
|
|
||
| const InterleaveGroup<Instruction> *getInterleaveGroup() const { return IG; } | ||
|
|
||
| Instruction *getInsertPos() const { return IG->getInsertPos(); } | ||
|
|
||
| void execute(VPTransformState &State) override { | ||
| llvm_unreachable("VPInterleaveBase should not be instantiated."); | ||
| } | ||
|
|
||
| /// Return the cost of this recipe. | ||
| InstructionCost computeCost(ElementCount VF, | ||
| VPCostContext &Ctx) const override; | ||
|
|
||
| /// Returns true if the recipe only uses the first lane of operand \p Op. | ||
| virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0; | ||
|
|
||
| /// Returns the number of stored operands of this interleave group. Returns 0 | ||
| /// for load interleave groups. | ||
| virtual unsigned getNumStoreOperands() const = 0; | ||
|
|
||
| /// Return the VPValues stored by this interleave group. If it is a load | ||
| /// interleave group, return an empty ArrayRef. | ||
| ArrayRef<VPValue *> getStoredValues() const { | ||
| // The first operand is the address, followed by the stored values, followed | ||
| // by an optional mask. | ||
| return ArrayRef<VPValue *>(op_begin(), getNumOperands()) | ||
| .slice(1, getNumStoreOperands()); | ||
| return ArrayRef<VPValue *>(op_end() - | ||
| (getNumStoreOperands() + (HasMask ? 1 : 0)), | ||
| getNumStoreOperands()); | ||
| } | ||
| }; | ||
|
|
||
| /// VPInterleaveRecipe is a recipe for transforming an interleave group of load | ||
| /// or stores into one wide load/store and shuffles. The first operand of a | ||
| /// VPInterleave recipe is the address, followed by the stored values, followed | ||
| /// by an optional mask. | ||
| class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase { | ||
| public: | ||
| VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, | ||
| ArrayRef<VPValue *> StoredValues, VPValue *Mask, | ||
| bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) | ||
| : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask, | ||
| NeedsMaskForGaps, MD, DL) {} | ||
|
|
||
| ~VPInterleaveRecipe() override = default; | ||
|
|
||
| VPInterleaveRecipe *clone() override { | ||
| return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(), | ||
| getStoredValues(), getMask(), | ||
| needsMaskForGaps(), *this, getDebugLoc()); | ||
| } | ||
|
|
||
| VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) | ||
|
|
||
| /// Generate the wide load or store, and shuffles. | ||
| void execute(VPTransformState &State) override; | ||
|
|
||
| /// Return the cost of this VPInterleaveRecipe. | ||
| InstructionCost computeCost(ElementCount VF, | ||
| VPCostContext &Ctx) const override; | ||
|
|
||
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
| /// Print the recipe. | ||
| void print(raw_ostream &O, const Twine &Indent, | ||
| VPSlotTracker &SlotTracker) const override; | ||
| #endif | ||
|
|
||
| const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } | ||
| bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
| assert(is_contained(operands(), Op) && | ||
| "Op must be an operand of the recipe"); | ||
| return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); | ||
| } | ||
|
|
||
| /// Returns the number of stored operands of this interleave group. Returns 0 | ||
| /// for load interleave groups. | ||
| unsigned getNumStoreOperands() const { | ||
| return getNumOperands() - (HasMask ? 2 : 1); | ||
| unsigned getNumStoreOperands() const override { | ||
| return getNumOperands() - (getMask() ? 2 : 1); | ||
| } | ||
| }; | ||
|
|
||
| /// A recipe for interleaved memory operations with vector-predication | ||
| /// intrinsics. The first operand is the address, the second operand is the | ||
| /// explicit vector length. Stored values and mask are optional operands. | ||
| class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase { | ||
| public: | ||
| VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask) | ||
| : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(), | ||
| ArrayRef<VPValue *>({R.getAddr(), &EVL}), | ||
| R.getStoredValues(), Mask, R.needsMaskForGaps(), R, | ||
| R.getDebugLoc()) { | ||
| assert(!getInterleaveGroup()->isReverse() && | ||
| "Reversed interleave-group with tail folding is not supported."); | ||
| assert(!needsMaskForGaps() && "Interleaved access with gap mask is not " | ||
| "supported for scalable vector."); | ||
| } | ||
|
|
||
| ~VPInterleaveEVLRecipe() override = default; | ||
|
|
||
| VPInterleaveEVLRecipe *clone() override { | ||
| llvm_unreachable("cloning not implemented yet"); | ||
| } | ||
|
|
||
| /// The recipe only uses the first lane of the address. | ||
| VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC) | ||
|
|
||
| /// The VPValue of the explicit vector length. | ||
| VPValue *getEVL() const { return getOperand(1); } | ||
|
|
||
| /// Generate the wide load or store, and shuffles. | ||
| void execute(VPTransformState &State) override; | ||
|
|
||
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
| /// Print the recipe. | ||
| void print(raw_ostream &O, const Twine &Indent, | ||
| VPSlotTracker &SlotTracker) const override; | ||
| #endif | ||
|
|
||
| /// The recipe only uses the first lane of the address, and EVL operand. | ||
| bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
| assert(is_contained(operands(), Op) && | ||
| "Op must be an operand of the recipe"); | ||
| return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); | ||
| return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) || | ||
| Op == getEVL(); | ||
| } | ||
|
|
||
| Instruction *getInsertPos() const { return IG->getInsertPos(); } | ||
| unsigned getNumStoreOperands() const override { | ||
| return getNumOperands() - (getMask() ? 3 : 2); | ||
| } | ||
| }; | ||
|
|
||
| /// A recipe to represent inloop reduction operations, performing a reduction on | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The suggested code looks identical to the original one, or am I missing something?