Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3157,11 +3157,11 @@ class TargetLoweringBase {
/// Lower an interleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p LI is the vector load instruction.
/// \p LoadOp is a vector load or vp.load instruction.
/// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
/// \p Indices is the corresponding indices for each shufflevector.
/// \p Factor is the interleave factor.
virtual bool lowerInterleavedLoad(LoadInst *LI,
virtual bool lowerInterleavedLoad(Instruction *LoadOp,
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
unsigned Factor) const {
Expand All @@ -3171,10 +3171,11 @@ class TargetLoweringBase {
/// Lower an interleaved store to target specific intrinsics. Return
/// true on success.
///
/// \p SI is the vector store instruction.
/// \p StoreOp is a vector store or vp.store instruction.
/// \p SVI is the shufflevector to RE-interleave the stored vector.
/// \p Factor is the interleave factor.
virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
virtual bool lowerInterleavedStore(Instruction *StoreOp,
ShuffleVectorInst *SVI,
unsigned Factor) const {
return false;
}
Expand Down
177 changes: 150 additions & 27 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
Expand Down Expand Up @@ -100,11 +101,11 @@ class InterleavedAccessImpl {
unsigned MaxFactor = 0u;

/// Transform an interleaved load into target specific intrinsics.
bool lowerInterleavedLoad(LoadInst *LI,
bool lowerInterleavedLoad(Instruction *LoadOp,
SmallSetVector<Instruction *, 32> &DeadInsts);

/// Transform an interleaved store into target specific intrinsics.
bool lowerInterleavedStore(StoreInst *SI,
bool lowerInterleavedStore(Instruction *StoreOp,
SmallSetVector<Instruction *, 32> &DeadInsts);

/// Transform a load and a deinterleave intrinsic into target specific
Expand All @@ -131,7 +132,7 @@ class InterleavedAccessImpl {
/// made.
bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
LoadInst *LI);
Instruction *LI);
};

class InterleavedAccess : public FunctionPass {
Expand Down Expand Up @@ -250,10 +251,23 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
}

bool InterleavedAccessImpl::lowerInterleavedLoad(
LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) {
if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
Instruction *LoadOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this just be Load? I'm not sure the Op is providing any value.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

if (isa<ScalableVectorType>(LoadOp->getType()))
return false;

if (auto *LI = dyn_cast<LoadInst>(LoadOp)) {
if (!LI->isSimple())
return false;
} else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
// Require a constant mask and evl.
if (!isa<ConstantVector>(VPLoad->getArgOperand(1)) ||
!isa<ConstantInt>(VPLoad->getArgOperand(2)))
return false;
} else {
llvm_unreachable("unsupported load operation");
}

// Check if all users of this load are shufflevectors. If we encounter any
// users that are extractelement instructions or binary operators, we save
// them to later check if they can be modified to extract from one of the
Expand All @@ -265,7 +279,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// binop are the same load.
SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;

for (auto *User : LI->users()) {
for (auto *User : LoadOp->users()) {
auto *Extract = dyn_cast<ExtractElementInst>(User);
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
Extracts.push_back(Extract);
Expand Down Expand Up @@ -294,13 +308,31 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
unsigned Factor, Index;

unsigned NumLoadElements =
cast<FixedVectorType>(LI->getType())->getNumElements();
cast<FixedVectorType>(LoadOp->getType())->getNumElements();
auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
NumLoadElements))
return false;

// If this is a vp.load, record its mask (NOT shuffle mask).
BitVector MaskedIndices(NumLoadElements);
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
auto *Mask = cast<ConstantVector>(VPLoad->getArgOperand(1));
assert(cast<FixedVectorType>(Mask->getType())->getNumElements() ==
NumLoadElements);
if (auto *Splat = Mask->getSplatValue()) {
// All-zeros mask, bail out early.
if (Splat->isZeroValue())
return false;
} else {
for (unsigned i = 0U; i < NumLoadElements; ++i) {
if (Mask->getAggregateElement(i)->isZeroValue())
MaskedIndices.set(i);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for (unsigned I : seq<unsigned>(NumLoadElements)) {

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? Is that in the coding standards?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, of course not, just a recommendation + var name should start with upper case letter.

}
}

// Holds the corresponding index for each DE-interleave shuffle.
SmallVector<unsigned, 4> Indices;

Expand All @@ -327,9 +359,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(

assert(Shuffle->getShuffleMask().size() <= NumLoadElements);

if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LoadOp)
Indices.push_back(Index);
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LoadOp)
Indices.push_back(Index);
}

Expand All @@ -339,25 +371,61 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
return false;

bool BinOpShuffleChanged =
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LoadOp);

// Check if we extract only the unmasked elements.
if (MaskedIndices.any()) {
if (any_of(Shuffles, [&](const auto *Shuffle) {
ArrayRef<int> ShuffleMask = Shuffle->getShuffleMask();
for (int Idx : ShuffleMask) {
if (Idx < 0)
continue;
if (MaskedIndices.test(unsigned(Idx)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (MaskedIndices.test(Idx))

return true;
}
return false;
})) {
LLVM_DEBUG(dbgs() << "IA: trying to extract a masked element through "
<< "shufflevector\n");
return false;
}
}
// Check if we extract only the elements within evl.
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
uint64_t EVL = cast<ConstantInt>(VPLoad->getArgOperand(2))->getZExtValue();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int64_t EVL =

if (any_of(Shuffles, [&](const auto *Shuffle) {
ArrayRef<int> ShuffleMask = Shuffle->getShuffleMask();
for (int Idx : ShuffleMask) {
if (Idx < 0)
continue;
if (unsigned(Idx) >= EVL)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (Idx >= EVL)

return true;
}
return false;
})) {
LLVM_DEBUG(
dbgs() << "IA: trying to extract an element out of EVL range\n");
return false;
}
}

LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LoadOp << "\n");

// Try to create target specific intrinsics to replace the load and shuffles.
if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
if (!TLI->lowerInterleavedLoad(LoadOp, Shuffles, Indices, Factor)) {
// If Extracts is not empty, tryReplaceExtracts made changes earlier.
return !Extracts.empty() || BinOpShuffleChanged;
}

DeadInsts.insert_range(Shuffles);

DeadInsts.insert(LI);
DeadInsts.insert(LoadOp);
return true;
}

bool InterleavedAccessImpl::replaceBinOpShuffles(
ArrayRef<ShuffleVectorInst *> BinOpShuffles,
SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
SmallVectorImpl<ShuffleVectorInst *> &Shuffles, Instruction *LoadOp) {
for (auto *SVI : BinOpShuffles) {
BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
Type *BIOp0Ty = BI->getOperand(0)->getType();
Expand All @@ -380,9 +448,9 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
<< "\n With : " << *NewSVI1 << "\n And : "
<< *NewSVI2 << "\n And : " << *NewBI << "\n");
RecursivelyDeleteTriviallyDeadInstructions(SVI);
if (NewSVI1->getOperand(0) == LI)
if (NewSVI1->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI1);
if (NewSVI2->getOperand(0) == LI)
if (NewSVI2->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI2);
}

Expand Down Expand Up @@ -454,27 +522,79 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
}

bool InterleavedAccessImpl::lowerInterleavedStore(
StoreInst *SI, SmallSetVector<Instruction *, 32> &DeadInsts) {
if (!SI->isSimple())
return false;
Instruction *StoreOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
Value *StoredValue;
if (auto *SI = dyn_cast<StoreInst>(StoreOp)) {
if (!SI->isSimple())
return false;
StoredValue = SI->getValueOperand();
} else if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
// Require a constant mask and evl.
if (!isa<ConstantVector>(VPStore->getArgOperand(2)) ||
!isa<ConstantInt>(VPStore->getArgOperand(3)))
return false;
StoredValue = VPStore->getArgOperand(0);
} else {
llvm_unreachable("unsupported store operation");
}

auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;

unsigned NumStoredElements =
cast<FixedVectorType>(SVI->getType())->getNumElements();
// If this is a vp.store, record its mask (NOT shuffle mask).
BitVector MaskedIndices(NumStoredElements);
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
auto *Mask = cast<ConstantVector>(VPStore->getArgOperand(2));
assert(cast<FixedVectorType>(Mask->getType())->getNumElements() ==
NumStoredElements);
if (auto *Splat = Mask->getSplatValue()) {
// All-zeros mask, bail out early.
if (Splat->isZeroValue())
return false;
} else {
for (unsigned i = 0U; i < NumStoredElements; ++i) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for (unsigned I : seq<unsigned>(NumStoredElements)) {

if (Mask->getAggregateElement(i)->isZeroValue())
MaskedIndices.set(i);
}
}
}

// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
Comment on lines 522 to 524
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for this PR, but just noting that the VP and non-VP paths in this function don't really share much code at all. It's probably justified if you want to split these out into two separate functions. lowerInterleavedLoad might need more refactoring though


LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
// Check if we store only the unmasked elements.
if (MaskedIndices.any()) {
if (any_of(SVI->getShuffleMask(), [&](int Idx) {
return Idx >= 0 && MaskedIndices.test(unsigned(Idx));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return Idx >= 0 && MaskedIndices.test(Idx);

})) {
LLVM_DEBUG(dbgs() << "IA: trying to store a masked element\n");
return false;
}
}
// Check if we store only the elements within evl.
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
uint64_t EVL = cast<ConstantInt>(VPStore->getArgOperand(3))->getZExtValue();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int64_t EVL = cast<ConstantInt>(VPStore->getArgOperand(3))->getZExtValue();

if (any_of(SVI->getShuffleMask(),
[&](int Idx) { return Idx >= 0 && unsigned(Idx) >= EVL; })) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[&](int Idx) { return Idx >= 0 && Idx >= EVL; })) {

LLVM_DEBUG(dbgs() << "IA: trying to store an element out of EVL range\n");
return false;
}
}

LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *StoreOp << "\n");

// Try to create target specific intrinsics to replace the store and shuffle.
if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
if (!TLI->lowerInterleavedStore(StoreOp, SVI, Factor))
return false;

// Already have a new target specific interleaved store. Erase the old store.
DeadInsts.insert(SI);
DeadInsts.insert(StoreOp);
DeadInsts.insert(SVI);
return true;
}
Expand Down Expand Up @@ -766,12 +886,15 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
SmallSetVector<Instruction *, 32> DeadInsts;
bool Changed = false;

using namespace PatternMatch;
for (auto &I : instructions(F)) {
if (auto *LI = dyn_cast<LoadInst>(&I))
Changed |= lowerInterleavedLoad(LI, DeadInsts);
if (match(&I, m_CombineOr(m_Load(m_Value()),
m_Intrinsic<Intrinsic::vp_load>())))
Changed |= lowerInterleavedLoad(&I, DeadInsts);

if (auto *SI = dyn_cast<StoreInst>(&I))
Changed |= lowerInterleavedStore(SI, DeadInsts);
if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
m_Intrinsic<Intrinsic::vp_store>())))
Changed |= lowerInterleavedStore(&I, DeadInsts);

if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17176,14 +17176,18 @@ static Function *getStructuredStoreFunction(Module *M, unsigned Factor,
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
bool AArch64TargetLowering::lowerInterleavedLoad(
LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
Instruction *LoadOp, ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices, unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
assert(!Shuffles.empty() && "Empty shufflevector input");
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");

auto *LI = dyn_cast<LoadInst>(LoadOp);
if (!LI)
return false;

const DataLayout &DL = LI->getDataLayout();

VectorType *VTy = Shuffles[0]->getType();
Expand Down Expand Up @@ -17359,13 +17363,17 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
bool AArch64TargetLowering::lowerInterleavedStore(Instruction *StoreOp,
ShuffleVectorInst *SVI,
unsigned Factor) const {

assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");

auto *SI = dyn_cast<StoreInst>(StoreOp);
if (!SI)
return false;

auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -716,11 +716,11 @@ class AArch64TargetLowering : public TargetLowering {

unsigned getMaxSupportedInterleaveFactor() const override { return 4; }

bool lowerInterleavedLoad(LoadInst *LI,
bool lowerInterleavedLoad(Instruction *LoadOp,
ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices,
unsigned Factor) const override;
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
bool lowerInterleavedStore(Instruction *StoreOp, ShuffleVectorInst *SVI,
unsigned Factor) const override;

bool lowerDeinterleaveIntrinsicToLoad(
Expand Down
Loading
Loading