Skip to content
20 changes: 20 additions & 0 deletions llvm/include/llvm/Analysis/VectorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "llvm/Support/Compiler.h"

namespace llvm {
class IntrinsicInst;
class TargetLibraryInfo;

/// The Vector Function Database.
Expand Down Expand Up @@ -188,6 +189,25 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);

/// A vector can either be deinterleaved through an intrinsic or a combination
/// of shufflevector instructions. This is a thin abstraction layer to provide
/// some common information like the deinterleaving factor.
struct VectorDeinterleaving {
Copy link
Member Author

@mshockwave mshockwave Jul 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to make this wrapper as simple as possible. I'm open to make it a union + tag or even std::variant.

IntrinsicInst *DI = nullptr;
ArrayRef<Value *> Values;

unsigned getFactor() const;

Type *getDeinterleavedType() const;

explicit VectorDeinterleaving(IntrinsicInst *DI) : DI(DI) {}
explicit VectorDeinterleaving(ArrayRef<Value *> Values) : Values(Values) {}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
bool isValid() const { return (DI != nullptr) ^ !Values.empty(); }
#endif
};

/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
Expand Down
13 changes: 7 additions & 6 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetTransformInfo;
class Value;
struct VectorDeinterleaving;
class VPIntrinsic;

namespace Sched {
Expand Down Expand Up @@ -3228,9 +3229,10 @@ class LLVM_ABI TargetLoweringBase {
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
/// \p DeinterleaveRes is a list of deinterleaved results.
/// \p VD represents either a deinterleave intrinsic or a list of
/// deinterleaved values.
virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
ArrayRef<Value *> DeinterleaveRes) const {
const VectorDeinterleaving &VD) const {
return false;
}

Expand All @@ -3250,10 +3252,9 @@ class LLVM_ABI TargetLoweringBase {
/// llvm.vector.deinterleave{2,3,5,7}
///
/// \p LI is the accompanying load instruction.
/// \p DeinterleaveValues contains the deinterleaved values.
virtual bool
lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
ArrayRef<Value *> DeinterleaveValues) const {
/// \p DI represents the deinterleave intrinsic.
virtual bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
IntrinsicInst *DI) const {
return false;
}

Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,24 @@ unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) {
}
}

unsigned VectorDeinterleaving::getFactor() const {
assert(isValid());
if (DI)
return getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());

return Values.size();
}

Type *VectorDeinterleaving::getDeinterleavedType() const {
assert(getFactor() > 0);
if (DI)
return *DI->getType()->subtype_begin();

Value *FirstActive =
*llvm::find_if(Values, [](Value *V) { return V != nullptr; });
return FirstActive->getType();
}

/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
Expand Down
32 changes: 8 additions & 24 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,8 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues))
VectorDeinterleaving VD(ShuffleValues);
if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, VD))
// If Extracts is not empty, tryReplaceExtracts made changes earlier.
return !Extracts.empty() || BinOpShuffleChanged;
} else {
Expand Down Expand Up @@ -615,32 +616,17 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
return false;

const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
VectorDeinterleaving VD(DI);
const unsigned Factor = VD.getFactor();
assert(Factor && "unexpected deinterleave intrinsic");

SmallVector<Value *, 8> DeinterleaveValues(Factor, nullptr);
Value *LastFactor = nullptr;
for (auto *User : DI->users()) {
auto *Extract = dyn_cast<ExtractValueInst>(User);
if (!Extract || Extract->getNumIndices() != 1)
return false;
unsigned Idx = Extract->getIndices()[0];
if (DeinterleaveValues[Idx])
return false;
DeinterleaveValues[Idx] = Extract;
LastFactor = Extract;
}

if (!LastFactor)
return false;

if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
return false;
// Check mask operand. Handle both all-true/false and interleaved mask.
Value *WideMask = VPLoad->getOperand(1);
Value *Mask =
getMask(WideMask, Factor, cast<VectorType>(LastFactor->getType()));
getMask(WideMask, Factor, cast<VectorType>(VD.getDeinterleavedType()));
if (!Mask)
return false;

Expand All @@ -649,7 +635,8 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(

// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, DeinterleaveValues))
VectorDeinterleaving VD(DI);
if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, VD))
return false;

} else {
Expand All @@ -661,13 +648,10 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
<< " and factor = " << Factor << "\n");

// Try and match this with target specific intrinsics.
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DI))
return false;
}

for (Value *V : DeinterleaveValues)
if (V)
DeadInsts.insert(cast<Instruction>(V));
DeadInsts.insert(DI);
// We now have a target-specific load, so delete the old one.
DeadInsts.insert(cast<Instruction>(LoadedVal));
Expand Down
31 changes: 13 additions & 18 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17476,16 +17476,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
}

bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
LoadInst *LI, ArrayRef<Value *> DeinterleavedValues) const {
unsigned Factor = DeinterleavedValues.size();
LoadInst *LI, IntrinsicInst *DI) const {
VectorDeinterleaving VD(DI);
const unsigned Factor = VD.getFactor();
if (Factor != 2 && Factor != 4) {
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
return false;
}

Value *FirstActive = *llvm::find_if(DeinterleavedValues,
[](Value *V) { return V != nullptr; });
VectorType *VTy = cast<VectorType>(FirstActive->getType());
VectorType *VTy = cast<VectorType>(VD.getDeinterleavedType());

const DataLayout &DL = LI->getModule()->getDataLayout();
bool UseScalable;
Expand Down Expand Up @@ -17513,6 +17512,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());

Value *BaseAddr = LI->getPointerOperand();
Value *Result = nullptr;
if (NumLoads > 1) {
// Create multiple legal small ldN.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this next to the insertvalue loop?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

SmallVector<Value *, 4> ExtractedLdValues(Factor, PoisonValue::get(VTy));
Expand All @@ -17533,25 +17533,20 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
}
// Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
for (unsigned J = 0; J < Factor; ++J) {
if (DeinterleavedValues[J])
DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
}

// Merge the values from different factors.
Result = PoisonValue::get(DI->getType());
for (unsigned J = 0; J < Factor; ++J)
Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
} else {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this extra intertvalue shouldn't affect codegen. @hassnaaHamdi do you think AArch64 also needs some codegen tests for InterleavedAccess?

Value *Result;
if (UseScalable)
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
else
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
// Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
for (unsigned I = 0; I < Factor; I++) {
if (DeinterleavedValues[I]) {
Value *NewExtract = Builder.CreateExtractValue(Result, I);
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
}
}
}

// Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
DI->replaceAllUsesWith(Result);
return true;
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ class AArch64TargetLowering : public TargetLowering {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;

bool lowerDeinterleaveIntrinsicToLoad(
LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override;
bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
IntrinsicInst *DI) const override;

bool lowerInterleaveIntrinsicToStore(
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,14 @@ class RISCVTargetLowering : public TargetLowering {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;

bool lowerDeinterleaveIntrinsicToLoad(
LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override;
bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
IntrinsicInst *DI) const override;

bool lowerInterleaveIntrinsicToStore(
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;

bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
ArrayRef<Value *> DeinterleaveRes) const override;
const VectorDeinterleaving &VD) const override;

bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
ArrayRef<Value *> InterleaveOps) const override;
Expand Down
52 changes: 23 additions & 29 deletions llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "RISCVISelLowering.h"
#include "RISCVSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
Expand Down Expand Up @@ -233,17 +234,17 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
}

bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const {
const unsigned Factor = DeinterleaveValues.size();
LoadInst *LI, IntrinsicInst *DI) const {
VectorDeinterleaving VD(DI);
const unsigned Factor = VD.getFactor();
assert(Factor && "unexpected deinterleaving factor");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant assert, already checked in callee.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

if (Factor > 8)
return false;

assert(LI->isSimple());
IRBuilder<> Builder(LI);

Value *FirstActive =
*llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
VectorType *ResVTy = cast<VectorType>(VD.getDeinterleavedType());

const DataLayout &DL = LI->getDataLayout();

Expand Down Expand Up @@ -293,16 +294,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
}

for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
if (!DIV)
continue;
// We have to create a brand new ExtractValue to replace each
// of these old ExtractValue instructions.
Value *NewEV =
Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
DIV->replaceAllUsesWith(NewEV);
}

DI->replaceAllUsesWith(Return);
return true;
}

Expand Down Expand Up @@ -419,16 +411,14 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
/// dealing with factor of 2 (extractvalue is still required for most of other
/// factors though).
bool RISCVTargetLowering::lowerInterleavedVPLoad(
VPIntrinsic *Load, Value *Mask,
ArrayRef<Value *> DeinterleaveResults) const {
const unsigned Factor = DeinterleaveResults.size();
VPIntrinsic *Load, Value *Mask, const VectorDeinterleaving &VD) const {
assert(Mask && "Expect a valid mask");
assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
"Unexpected intrinsic");

Value *FirstActive = *llvm::find_if(DeinterleaveResults,
[](Value *V) { return V != nullptr; });
VectorType *VTy = cast<VectorType>(FirstActive->getType());
const unsigned Factor = VD.getFactor();
assert(Factor && "unexpected vector deinterleaving");
VectorType *VTy = cast<VectorType>(VD.getDeinterleavedType());

auto &DL = Load->getModule()->getDataLayout();
Align Alignment = Load->getParamAlign(0).value_or(
Expand Down Expand Up @@ -494,14 +484,18 @@ bool RISCVTargetLowering::lowerInterleavedVPLoad(
}
}

for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
if (!DIO)
continue;
// We have to create a brand new ExtractValue to replace each
// of these old ExtractValue instructions.
Value *NewEV =
Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
DIO->replaceAllUsesWith(NewEV);
if (VD.DI) {
VD.DI->replaceAllUsesWith(Return);
} else {
for (auto [Idx, DIO] : enumerate(VD.Values)) {
if (!DIO)
continue;
// We have to create a brand new ExtractValue to replace each
// of these old ExtractValue instructions.
Value *NewEV =
Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
DIO->replaceAllUsesWith(NewEV);
}
}

return true;
Expand Down
Loading