-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LV] Convert gather loads with invariant stride into strided loads #147297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a2ca7fd
4a78f18
2a46ad6
5d99c2e
736210d
a3f18ae
2ad2cdc
2064369
eb70f43
12d526a
b8930a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -559,6 +559,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
| case VPRecipeBase::VPInterleaveEVLSC: | ||
| case VPRecipeBase::VPInterleaveSC: | ||
| case VPRecipeBase::VPIRInstructionSC: | ||
| case VPRecipeBase::VPWidenStridedLoadSC: | ||
| case VPRecipeBase::VPWidenLoadEVLSC: | ||
| case VPRecipeBase::VPWidenLoadSC: | ||
| case VPRecipeBase::VPWidenStoreEVLSC: | ||
|
|
@@ -1773,10 +1774,6 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, | |
| class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { | ||
| Type *SourceElementTy; | ||
|
|
||
| bool isPointerLoopInvariant() const { | ||
| return getOperand(0)->isDefinedOutsideLoopRegions(); | ||
| } | ||
|
|
||
| bool isIndexLoopInvariant(unsigned I) const { | ||
| return getOperand(I + 1)->isDefinedOutsideLoopRegions(); | ||
| } | ||
|
|
@@ -1809,6 +1806,30 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { | |
| /// This recipe generates a GEP instruction. | ||
| unsigned getOpcode() const { return Instruction::GetElementPtr; } | ||
|
|
||
| bool isPointerLoopInvariant() const { | ||
| return getOperand(0)->isDefinedOutsideLoopRegions(); | ||
| } | ||
|
Comment on lines
+1809
to
+1811
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unrelated move? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change it to public because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I see, thanks |
||
|
|
||
| std::optional<unsigned> getUniqueVariantIndex() const { | ||
| std::optional<unsigned> VarIdx; | ||
| for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) { | ||
| if (isIndexLoopInvariant(I)) | ||
| continue; | ||
|
|
||
| if (VarIdx) | ||
| return std::nullopt; | ||
| VarIdx = I; | ||
| } | ||
| return VarIdx; | ||
| } | ||
|
|
||
| /// Returns the element type for the first \p I indices of this recipe. | ||
| Type *getIndexedType(unsigned I) const { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would be good to document There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); | ||
| SmallVector<Value *, 4> Ops(GEP->idx_begin(), GEP->idx_begin() + I); | ||
| return GetElementPtrInst::getIndexedType(SourceElementTy, Ops); | ||
| } | ||
|
|
||
| /// Generate the gep nodes. | ||
| void execute(VPTransformState &State) override; | ||
|
|
||
|
|
@@ -1899,20 +1920,23 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, | |
| #endif | ||
| }; | ||
|
|
||
| /// A recipe to compute the pointers for widened memory accesses of IndexTy. | ||
| /// A recipe to compute the pointers for widened memory accesses of | ||
| /// SourceElementTy, with the Stride expressed in units of SourceElementTy. | ||
| class VPVectorPointerRecipe : public VPRecipeWithIRFlags, | ||
| public VPUnrollPartAccessor<1> { | ||
| public VPUnrollPartAccessor<2> { | ||
| Type *SourceElementTy; | ||
|
|
||
| public: | ||
| VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, | ||
| VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, | ||
| GEPNoWrapFlags GEPFlags, DebugLoc DL) | ||
| : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr), | ||
| GEPFlags, DL), | ||
| : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, | ||
| ArrayRef<VPValue *>({Ptr, Stride}), GEPFlags, DL), | ||
| SourceElementTy(SourceElementTy) {} | ||
|
|
||
| VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) | ||
|
|
||
| VPValue *getStride() const { return getOperand(1); } | ||
|
|
||
| void execute(VPTransformState &State) override; | ||
|
|
||
| Type *getSourceElementType() const { return SourceElementTy; } | ||
|
|
@@ -1933,7 +1957,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, | |
|
|
||
| VPVectorPointerRecipe *clone() override { | ||
| return new VPVectorPointerRecipe(getOperand(0), SourceElementTy, | ||
| getGEPNoWrapFlags(), getDebugLoc()); | ||
| getStride(), getGEPNoWrapFlags(), | ||
| getDebugLoc()); | ||
| } | ||
|
|
||
| /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that | ||
|
|
@@ -3205,7 +3230,8 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, | |
| return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC || | ||
| R->getVPDefID() == VPRecipeBase::VPWidenStoreSC || | ||
| R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC || | ||
| R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC; | ||
| R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC || | ||
| R->getVPDefID() == VPRecipeBase::VPWidenStridedLoadSC; | ||
| } | ||
|
|
||
| static inline bool classof(const VPUser *U) { | ||
|
|
@@ -3326,6 +3352,57 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { | |
| } | ||
| }; | ||
|
|
||
| /// A recipe for strided load operations, using the base address, stride, VF, | ||
| /// and an optional mask. This recipe will generate a vp.strided.load intrinsic | ||
| /// call to represent memory accesses with a fixed stride. | ||
| struct VPWidenStridedLoadRecipe final : public VPWidenMemoryRecipe, | ||
| public VPValue { | ||
| VPWidenStridedLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Stride, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we store the required info independent of the LLVM IR instruction? It would also help to clarify what information is actually used and why this cannot be simply a VPWidenIntrinsicsRecipe, as it would map 1-1 to an intrinsic There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Maybe, if we also store the type information in the recipe. But I want to know, in what scenario would we create a VPWidenStridedLoadRecipe without a LoadInst?
I tried to do that: Mel-Chen@d19fe61 Next, it might become difficult to ensure profitability before generating the strided access (i.e., we may not be able to achieve the change suggested in this comment Finally, using VPWidenIntrinsicRecipe reduces readability during transformations. Currently, VPWidenStridedLoadRecipe provides members like getAddr() and getStride(), which improves readability. This issue is not limited to EVL lowering—it should also have an impact #164205. That’s my take on this. What’s your opinion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hm yes, this is a general modeling issue, but could be improved separately (and does not only apply to this intrinsic I think). Is setting the alignment critical for the initial implementation?
I am not sure I understand this completely; We should be able to check the profitability up-front regardless of what recipe we create later on, I think?
I think this could be mostly taken care of by providing a |
||
| VPValue *VF, VPValue *Mask, | ||
| const VPIRMetadata &Metadata, DebugLoc DL) | ||
| : VPWidenMemoryRecipe( | ||
| VPDef::VPWidenStridedLoadSC, Load, {Addr, Stride, VF}, | ||
| /*Consecutive=*/false, /*Reverse=*/false, Metadata, DL), | ||
| VPValue(this, &Load) { | ||
| setMask(Mask); | ||
| } | ||
|
|
||
| VPWidenStridedLoadRecipe *clone() override { | ||
| return new VPWidenStridedLoadRecipe(cast<LoadInst>(Ingredient), getAddr(), | ||
| getStride(), getVF(), getMask(), *this, | ||
| getDebugLoc()); | ||
| } | ||
|
|
||
| VP_CLASSOF_IMPL(VPDef::VPWidenStridedLoadSC); | ||
|
|
||
| /// Return the stride operand. | ||
| VPValue *getStride() const { return getOperand(1); } | ||
|
|
||
| /// Return the VF operand. | ||
| VPValue *getVF() const { return getOperand(2); } | ||
|
|
||
| /// Generate a strided load. | ||
| void execute(VPTransformState &State) override; | ||
|
|
||
| /// Return the cost of this VPWidenStridedLoadRecipe. | ||
| InstructionCost computeCost(ElementCount VF, | ||
| VPCostContext &Ctx) const override; | ||
|
|
||
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
| /// Print the recipe. | ||
| void print(raw_ostream &O, const Twine &Indent, | ||
| VPSlotTracker &SlotTracker) const override; | ||
| #endif | ||
|
|
||
| /// Returns true if the recipe only uses the first lane of operand \p Op. | ||
| bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
| assert(is_contained(operands(), Op) && | ||
| "Op must be an operand of the recipe"); | ||
| // All operands except the mask are only used for the first lane. | ||
| return Op == getAddr() || Op == getStride() || Op == getVF(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to document that this includes everything except the mask There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
| }; | ||
|
|
||
| /// A recipe for widening store operations, using the stored value, the address | ||
| /// to store to and an optional mask. | ||
| struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.