-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[IA][RISCV] Add support for vp.load/vp.store with shufflevector #135445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
69389a3
66118fd
9fd8830
ae31132
4a8852f
5a09f29
807c533
a41bfb1
e46d311
b383851
6c03ffa
92e8ad6
1c0a4a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3179,15 +3179,15 @@ class TargetLoweringBase { | |
| return false; | ||
| } | ||
|
|
||
| /// Lower an interleaved load to target specific intrinsics. Return | ||
| /// Lower a deinterleaved load to target specific intrinsics. Return | ||
| /// true on success. | ||
| /// | ||
| /// \p Load is a vp.load instruction. | ||
| /// \p Mask is a mask value | ||
| /// \p DeinterleaveRes is a list of deinterleaved results. | ||
| virtual bool | ||
| lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask, | ||
| ArrayRef<Value *> DeinterleaveRes) const { | ||
| lowerDeinterleavedVPLoad(VPIntrinsic *Load, Value *Mask, | ||
|
||
| ArrayRef<Value *> DeinterleaveRes) const { | ||
| return false; | ||
| } | ||
|
|
||
|
|
@@ -3197,9 +3197,8 @@ class TargetLoweringBase { | |
| /// \p Store is the vp.store instruction. | ||
| /// \p Mask is a mask value | ||
| /// \p InterleaveOps is a list of values being interleaved. | ||
| virtual bool | ||
| lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask, | ||
| ArrayRef<Value *> InterleaveOps) const { | ||
| virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask, | ||
| ArrayRef<Value *> InterleaveOps) const { | ||
| return false; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -100,11 +100,11 @@ class InterleavedAccessImpl { | |||||
| unsigned MaxFactor = 0u; | ||||||
|
|
||||||
| /// Transform an interleaved load into target specific intrinsics. | ||||||
| bool lowerInterleavedLoad(LoadInst *LI, | ||||||
| bool lowerInterleavedLoad(Instruction *LoadOp, | ||||||
| SmallSetVector<Instruction *, 32> &DeadInsts); | ||||||
|
|
||||||
| /// Transform an interleaved store into target specific intrinsics. | ||||||
| bool lowerInterleavedStore(StoreInst *SI, | ||||||
| bool lowerInterleavedStore(Instruction *StoreOp, | ||||||
| SmallSetVector<Instruction *, 32> &DeadInsts); | ||||||
|
|
||||||
| /// Transform a load and a deinterleave intrinsic into target specific | ||||||
|
|
@@ -131,7 +131,7 @@ class InterleavedAccessImpl { | |||||
| /// made. | ||||||
| bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles, | ||||||
| SmallVectorImpl<ShuffleVectorInst *> &Shuffles, | ||||||
| LoadInst *LI); | ||||||
| Instruction *LI); | ||||||
| }; | ||||||
|
|
||||||
| class InterleavedAccess : public FunctionPass { | ||||||
|
|
@@ -249,11 +249,43 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, | |||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| /// Return true if it's an interleaving mask. For instance, 111000111000 is | ||||||
| /// interleaved from three 1010 masks. \p SubMask returns the mask of individual | ||||||
| /// lane. | ||||||
| static bool isInterleavedConstantMask(unsigned Factor, ConstantVector *Mask, | ||||||
| SmallVectorImpl<Constant *> &LaneMask) { | ||||||
| unsigned LaneMaskLen = LaneMask.size(); | ||||||
| if (auto *Splat = Mask->getSplatValue()) { | ||||||
| std::fill(LaneMask.begin(), LaneMask.end(), Splat); | ||||||
| } else { | ||||||
| for (unsigned Idx = 0U, N = LaneMaskLen * Factor; Idx < N; ++Idx) { | ||||||
| Constant *Ref = Mask->getAggregateElement(alignDown(Idx, Factor)); | ||||||
| if (Ref != Mask->getAggregateElement(Idx)) | ||||||
| return false; | ||||||
| LaneMask[Idx / Factor] = Ref; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| bool InterleavedAccessImpl::lowerInterleavedLoad( | ||||||
| LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) { | ||||||
| if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType())) | ||||||
| Instruction *LoadOp, SmallSetVector<Instruction *, 32> &DeadInsts) { | ||||||
|
||||||
| if (isa<ScalableVectorType>(LoadOp->getType())) | ||||||
| return false; | ||||||
|
|
||||||
| if (auto *LI = dyn_cast<LoadInst>(LoadOp)) { | ||||||
| if (!LI->isSimple()) | ||||||
| return false; | ||||||
| } else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) { | ||||||
| assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load); | ||||||
| // Require a constant mask and evl. | ||||||
lukel97 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| if (!isa<ConstantVector>(VPLoad->getArgOperand(1))) | ||||||
|
||||||
| if (!isa<ConstantVector>(VPLoad->getArgOperand(1))) | |
| if (!isa<ConstantVector>(VPLoad->getMaskParam())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
lukel97 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
lukel97 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this an issue that only affects vp loads? Does the non-vp lowerInterleavedLoad need to worry about this too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, because TLI::lowerDeinterleavedVPLoad uses the number of shuffle / leaf values as the factor, that's why we have to pad ShuffleValues with null until it has a size of Factor. On the contrary, TLI::lowerInterleavedLoad has Factor explicitly passed into it. It also has the Indices argument to specify the order of shufflevector instructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, thanks for explaining it. Would be simpler to add a Factor argument to lowerDeinterleavedVPLoad to bring it inline with lowerInterleavedLoad? Non-blocking, just a suggestion.
EDIT: Actually I think this is probably best left to a separate PR where we clean up the TTI hooks, like you originally suggested.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (!isa<ConstantVector>(VPStore->getArgOperand(2))) | |
| if (!isa<ConstantVector>(VPStore->getMaskParam())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not for this PR, but just noting that the VP and non-VP paths in this function don't really share much code at all. It's probably justified if you want to split these out into two separate functions. lowerInterleavedLoad might need more refactoring though
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
empty block?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is likely caused by the git merge (which started to worry me that there are other places like this). I'll remove it in the next update
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's fixed now
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
expected -> expects?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just noting it would be nice to not have to create the shuffles in case TLI->lowerInterleavedVPStore bails and we end up leaving them around. But I don't have any good suggestions for now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I believe a similar thing happened in InterleaveAccessImpl::lowerInterleavedLoad: we might create new shufflevectors when canonicalizing the extractions, even if we bail out later in TLI::lowerInterleavedLoad.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should still be "interleaved load" to match the terminology used in the header description at the top of InterleavedAccessPass.cpp and the loop vectorizer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree, it's fixed now.