Skip to content
Merged
14 changes: 7 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3523,10 +3523,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
if (hasIrregularType(ScalarTy, DL))
return false;

// We currently only know how to emit interleave/deinterleave with
// Factor=2 for scalable vectors. This is purely an implementation
// limit.
if (VF.isScalable() && InterleaveFactor != 2)
// For scalable vectors, the only interleave factor currently supported
// must be power of 2 since we require the (de)interleave2 intrinsics
// instead of shufflevectors.
if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
return false;

// If the group involves a non-integral pointer, we may not be able to
Expand Down Expand Up @@ -9159,9 +9159,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
// For scalable vectors, the only interleave factor currently supported
// is 2 since we require the (de)interleave2 intrinsics instead of
// shufflevectors.
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
// must be power of 2 since we require the (de)interleave2 intrinsics
// instead of shufflevectors.
assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
"Unsupported interleave factor for scalable vectors");
return Result;
};
Expand Down
78 changes: 55 additions & 23 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2779,10 +2779,19 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
// must use intrinsics to interleave.
if (VecTy->isScalableTy()) {
VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
Vals,
/*FMFSource=*/nullptr, Name);
SmallVector<Value *> InterleavingValues(Vals);
// When interleaving, the number of values will be shrunk until we have the
// single final interleaved value.
auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add assertion for confirming Factor is power of 2.

Copy link
Member Author

@hassnaaHamdi hassnaaHamdi Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @Mel-Chen
Thanks for looking at the patch.
The assert statement is already added before calling the interleaveVectors(..) function.

About your questions above:

  1. yes I have landed a patch for adding support to the InterleaveAccessPass to support reading the (de)interleave tree pattern.
  2. Adding them.
  3. If we have (de)interleave3 intrinsics, then we will have to do same logic for recursive (de)interleave3, and then the extra needed work will be representing the interleave factor by multiple of 2 and 3. so for the case of factor 6, we will do single iteration of (de)interleave2 then single iteration of (de)interleave3. The same logic will be applied for all factors that consist of multiples of 2 and 3 only.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @Mel-Chen
Are you satisfied about the latest changes ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The assert statement is already added before calling the interleaveVectors(..) function.

I think we still need an assert before the for loop, as this is a standalone function. This will ensure that no caller inadvertently passes an invalid factor in the future.

InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
for (unsigned I = 0; I < Midpoint; ++I)
InterleavingValues[I] = Builder.CreateIntrinsic(
InterleaveTy, Intrinsic::vector_interleave2,
{InterleavingValues[I], InterleavingValues[Midpoint + I]},
/*FMFSource=*/nullptr, Name);
}
return InterleavingValues[0];
}

// Fixed length. Start by concatenating all vectors into a wide vector.
Expand Down Expand Up @@ -2868,15 +2877,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
&InterleaveFactor](Value *MaskForGaps) -> Value * {
if (State.VF.isScalable()) {
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
assert(InterleaveFactor == 2 &&
assert(isPowerOf2_32(InterleaveFactor) &&
"Unsupported deinterleave factor for scalable vectors");
auto *ResBlockInMask = State.get(BlockInMask);
SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
State.VF.getKnownMinValue() * 2, true);
return State.Builder.CreateIntrinsic(
MaskTy, Intrinsic::vector_interleave2, Ops,
/*FMFSource=*/nullptr, "interleaved.mask");
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
return interleaveVectors(State.Builder, Ops, "interleaved.mask");
}

if (!BlockInMask)
Expand Down Expand Up @@ -2916,22 +2921,49 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
ArrayRef<VPValue *> VPDefs = definedValues();
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
if (VecTy->isScalableTy()) {
assert(InterleaveFactor == 2 &&
assert(isPowerOf2_32(InterleaveFactor) &&
"Unsupported deinterleave factor for scalable vectors");

// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
Value *DI = State.Builder.CreateIntrinsic(
Intrinsic::vector_deinterleave2, VecTy, NewLoad,
/*FMFSource=*/nullptr, "strided.vec");
unsigned J = 0;
for (unsigned I = 0; I < InterleaveFactor; ++I) {
Instruction *Member = Group->getMember(I);
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.

SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
DeinterleavedValues[0] = NewLoad;
// For the case of InterleaveFactor > 2, we will have to do recursive
// deinterleaving, because the current available deinterleave intrinsic
// supports only Factor of 2, otherwise it will bailout after first
// iteration.
// When deinterleaving, the number of values will double until we
// have "InterleaveFactor".
for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
NumVectors *= 2) {
// Deinterleave the elements within the vector
SmallVector<Value *> TempDeinterleavedValues(NumVectors);
for (unsigned I = 0; I < NumVectors; ++I) {
auto *DiTy = DeinterleavedValues[I]->getType();
TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
/*FMFSource=*/nullptr, "strided.vec");
}
// Extract the deinterleaved values:
for (unsigned I = 0; I < 2; ++I)
for (unsigned J = 0; J < NumVectors; ++J)
DeinterleavedValues[NumVectors * I + J] =
State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
}

if (!Member)
#ifndef NDEBUG
for (Value *Val : DeinterleavedValues)
assert(Val && "NULL Deinterleaved Value");
#endif
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
Instruction *Member = Group->getMember(I);
Value *StridedVec = DeinterleavedValues[I];
if (!Member) {
// This value is not needed as it's not used
static_cast<Instruction *>(StridedVec)->eraseFromParent();
continue;

Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
}
// If this member has different type, cast the result type.
if (Member->getType() != ScalarTy) {
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,4 @@ define void @negative_deinterleave4_test(ptr %src) {

ret void
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert this unnecessary change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this blank line.

Loading
Loading