@@ -2863,10 +2863,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
28632863 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
28642864 // must use intrinsics to interleave.
28652865 if (VecTy->isScalableTy ()) {
2866- VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2867- return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2868- Vals,
2869- /* FMFSource=*/ nullptr , Name);
2866+ assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2867+ " scalable vectors, must be power of 2" );
2868+ SmallVector<Value *> InterleavingValues (Vals);
2869+ // When interleaving, the number of values will be shrunk until we have the
2870+ // single final interleaved value.
2871+ auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2872+ for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2873+ InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2874+ for (unsigned I = 0 ; I < Midpoint; ++I)
2875+ InterleavingValues[I] = Builder.CreateIntrinsic (
2876+ InterleaveTy, Intrinsic::vector_interleave2,
2877+ {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2878+ /* FMFSource=*/ nullptr , Name);
2879+ }
2880+ return InterleavingValues[0 ];
28702881 }
28712882
28722883 // Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2952,15 +2963,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29522963 &InterleaveFactor](Value *MaskForGaps) -> Value * {
29532964 if (State.VF .isScalable ()) {
29542965 assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2955- assert (InterleaveFactor == 2 &&
2966+ assert (isPowerOf2_32 ( InterleaveFactor) &&
29562967 " Unsupported deinterleave factor for scalable vectors" );
29572968 auto *ResBlockInMask = State.get (BlockInMask);
2958- SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2959- auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2960- State.VF .getKnownMinValue () * 2 , true );
2961- return State.Builder .CreateIntrinsic (
2962- MaskTy, Intrinsic::vector_interleave2, Ops,
2963- /* FMFSource=*/ nullptr , " interleaved.mask" );
2969+ SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2970+ return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
29642971 }
29652972
29662973 if (!BlockInMask)
@@ -3000,22 +3007,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
30003007 ArrayRef<VPValue *> VPDefs = definedValues ();
30013008 const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
30023009 if (VecTy->isScalableTy ()) {
3003- assert (InterleaveFactor == 2 &&
3010+ assert (isPowerOf2_32 ( InterleaveFactor) &&
30043011 " Unsupported deinterleave factor for scalable vectors" );
30053012
3006- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3007- // so must use intrinsics to deinterleave.
3008- Value *DI = State.Builder .CreateIntrinsic (
3009- Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3010- /* FMFSource=*/ nullptr , " strided.vec" );
3011- unsigned J = 0 ;
3012- for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
3013- Instruction *Member = Group->getMember (I);
3013+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3014+ // so must use intrinsics to deinterleave.
3015+ SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
3016+ DeinterleavedValues[0 ] = NewLoad;
3017+ // For the case of InterleaveFactor > 2, we will have to do recursive
3018+ // deinterleaving, because the current available deinterleave intrinsic
3019+ // supports only Factor of 2, otherwise it will bailout after first
3020+ // iteration.
3021+ // When deinterleaving, the number of values will double until we
3022+ // have "InterleaveFactor".
3023+ for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
3024+ NumVectors *= 2 ) {
3025+ // Deinterleave the elements within the vector
3026+ SmallVector<Value *> TempDeinterleavedValues (NumVectors);
3027+ for (unsigned I = 0 ; I < NumVectors; ++I) {
3028+ auto *DiTy = DeinterleavedValues[I]->getType ();
3029+ TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
3030+ Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3031+ /* FMFSource=*/ nullptr , " strided.vec" );
3032+ }
3033+ // Extract the deinterleaved values:
3034+ for (unsigned I = 0 ; I < 2 ; ++I)
3035+ for (unsigned J = 0 ; J < NumVectors; ++J)
3036+ DeinterleavedValues[NumVectors * I + J] =
3037+ State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
3038+ }
30143039
3015- if (!Member)
3040+ #ifndef NDEBUG
3041+ for (Value *Val : DeinterleavedValues)
3042+ assert (Val && " NULL Deinterleaved Value" );
3043+ #endif
3044+ for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
3045+ Instruction *Member = Group->getMember (I);
3046+ Value *StridedVec = DeinterleavedValues[I];
3047+ if (!Member) {
3048+ // This value is not needed as it's not used
3049+ cast<Instruction>(StridedVec)->eraseFromParent ();
30163050 continue ;
3017-
3018- Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
3051+ }
30193052 // If this member has different type, cast the result type.
30203053 if (Member->getType () != ScalarTy) {
30213054 VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments