Skip to content
Merged
29 changes: 29 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetTransformInfo;
class Value;
class VPIntrinsic;

namespace Sched {

Expand Down Expand Up @@ -3152,6 +3153,34 @@ class TargetLoweringBase {
return false;
}

/// Lower an interleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
/// \p DeinterleaveIntrin is vector.deinterleave intrinsic
/// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool
lowerInterleavedScalableLoad(VPIntrinsic *Load, Value *Mask,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this doesn't have to be strictly restricted to scalable vectors, would it make sense to rename this something like lowerDeinterleaveIntrinsicToVPLoad to more closely match the lowerDeinterleaveIntrinsicToLoad hook?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah lowerDeinterleaveIntrinsicToVPLoad is more consistent with the existing naming. I'd updated it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also updated the list of arguments taken by lowerDeinterleaveIntrinsicToVPLoad / lowerInterleaveIntrinsicToVPStore and they no longer take the (de)interleaved intrinsic argument (because we're not really using it)

IntrinsicInst *DeinterleaveIntrin,
ArrayRef<Value *> DeinterleaveRes) const {
return false;
}

/// Lower an interleaved store to target specific intrinsics. Return
/// true on success.
///
/// \p Store is the vp.store instruction.
/// \p Mask is a mask value
/// \p InterleaveIntrin is vector.interleave intrinsic
/// \p InterleaveOps is a list of values being interleaved.
virtual bool
lowerInterleavedScalableStore(VPIntrinsic *Store, Value *Mask,
IntrinsicInst *InterleaveIntrin,
ArrayRef<Value *> InterleaveOps) const {
return false;
}

/// Lower a deinterleave intrinsic to a target specific load intrinsic.
/// Return true on success. Currently only supports
/// llvm.vector.deinterleave2
Expand Down
110 changes: 91 additions & 19 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,11 +630,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
return true;
}

/// Check the interleaved mask
///
/// - if a value within the optional is non-nullptr, the value corresponds to
/// deinterleaved mask
/// - if a value within the option is nullptr, the value corresponds to all-true
/// mask
/// - return nullopt if mask cannot be deinterleaved
static std::optional<Value *> getMask(Value *WideMask, unsigned Factor) {
using namespace llvm::PatternMatch;
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
SmallVector<Value *, 8> Operands;
SmallVector<Instruction *, 8> DeadInsts;
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
assert(!Operands.empty());
if (Operands.size() == Factor &&
std::equal(Operands.begin(), Operands.end(), Operands.begin()))
return Operands.front();
}
}
if (match(WideMask, m_AllOnes()))
return nullptr;
return std::nullopt;
}

bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));

if (!LI || !LI->hasOneUse() || !LI->isSimple())
Value *LoadedVal = DI->getOperand(0);
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
return false;

SmallVector<Value *, 8> DeinterleaveValues;
Expand All @@ -643,43 +666,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
DeinterleaveDeadInsts))
return false;

LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
<< " with factor = " << DeinterleaveValues.size() << "\n");
const unsigned Factor = DeinterleaveValues.size();

// Try and match this with target specific intrinsics.
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
return false;
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
return false;
// Check mask operand. Handle both all-true and interleaved mask.
Value *WideMask = VPLoad->getOperand(1);
std::optional<Value *> Mask = getMask(WideMask, Factor);
if (!Mask)
return false;

LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
<< *DI << " and factor = " << Factor << "\n");

// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
if (!TLI->lowerInterleavedScalableLoad(VPLoad, *Mask, DI,
DeinterleaveValues))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there anything preventing this from being called with a fixed length vector? Or why do we need to restrict it to scalable vectors in the first place?

It looks like RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad already handles fixed length vectors.

I don't think we need to handle fixed length vectors in this PR necessarily, we could just check for them in the TII hook and return false.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The vectorizer shouldn't generate deinterleave/interleave intrinsics for fixed vectors, but maybe we should guard against handwritten IR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Now RISCV's TLI implementation will check if it's scalable vectors or now.

return false;

} else {
auto *LI = cast<LoadInst>(LoadedVal);
if (!LI->isSimple())
return false;

LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
<< " and factor = " << Factor << "\n");

// Try and match this with target specific intrinsics.
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
return false;
}

DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
// We now have a target-specific load, so delete the old one.
DeadInsts.insert(LI);
DeadInsts.insert(cast<Instruction>(LoadedVal));
return true;
}

bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) {
if (!II->hasOneUse())
return false;

StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));

if (!SI || !SI->isSimple())
Value *StoredBy = II->user_back();
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
return false;

SmallVector<Value *, 8> InterleaveValues;
SmallVector<Instruction *, 8> InterleaveDeadInsts;
if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
return false;

LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
<< " with factor = " << InterleaveValues.size() << "\n");
const unsigned Factor = InterleaveValues.size();

// Try and match this with target specific intrinsics.
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
return false;
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
return false;

Value *WideMask = VPStore->getOperand(2);
std::optional<Value *> Mask = getMask(WideMask, Factor);
if (!Mask)
return false;

LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
<< *II << " and factor = " << Factor << "\n");

// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
// TLI function to emit target-specific interleaved instruction.
if (!TLI->lowerInterleavedScalableStore(VPStore, *Mask, II,
InterleaveValues))
return false;
} else {
auto *SI = cast<StoreInst>(StoredBy);
if (!SI->isSimple())
return false;

LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
<< " and factor = " << Factor << "\n");

// Try and match this with target specific intrinsics.
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
return false;
}

// We now have a target-specific store, so delete the old one.
DeadInsts.insert(SI);
DeadInsts.insert(cast<Instruction>(StoredBy));
DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
return true;
}
Expand Down
Loading
Loading