@@ -2849,10 +2849,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
28492849 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
28502850 // must use intrinsics to interleave.
28512851 if (VecTy->isScalableTy ()) {
2852- VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2853- return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2854- Vals,
2855- /* FMFSource=*/ nullptr , Name);
2852+ assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2853+ " scalable vectors, must be power of 2" );
2854+ SmallVector<Value *> InterleavingValues (Vals);
2855+ // When interleaving, the number of values will be shrunk until we have the
2856+ // single final interleaved value.
2857+ auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2858+ for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2859+ InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2860+ for (unsigned I = 0 ; I < Midpoint; ++I)
2861+ InterleavingValues[I] = Builder.CreateIntrinsic (
2862+ InterleaveTy, Intrinsic::vector_interleave2,
2863+ {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2864+ /* FMFSource=*/ nullptr , Name);
2865+ }
2866+ return InterleavingValues[0 ];
28562867 }
28572868
28582869 // Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2938,15 +2949,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29382949 &InterleaveFactor](Value *MaskForGaps) -> Value * {
29392950 if (State.VF .isScalable ()) {
29402951 assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2941- assert (InterleaveFactor == 2 &&
2952+ assert (isPowerOf2_32 ( InterleaveFactor) &&
29422953 " Unsupported deinterleave factor for scalable vectors" );
29432954 auto *ResBlockInMask = State.get (BlockInMask);
2944- SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2945- auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2946- State.VF .getKnownMinValue () * 2 , true );
2947- return State.Builder .CreateIntrinsic (
2948- MaskTy, Intrinsic::vector_interleave2, Ops,
2949- /* FMFSource=*/ nullptr , " interleaved.mask" );
2955+ SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2956+ return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
29502957 }
29512958
29522959 if (!BlockInMask)
@@ -2986,22 +2993,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29862993 ArrayRef<VPValue *> VPDefs = definedValues ();
29872994 const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
29882995 if (VecTy->isScalableTy ()) {
2989- assert (InterleaveFactor == 2 &&
2996+ assert (isPowerOf2_32 ( InterleaveFactor) &&
29902997 " Unsupported deinterleave factor for scalable vectors" );
29912998
2992- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2993- // so must use intrinsics to deinterleave.
2994- Value *DI = State.Builder .CreateIntrinsic (
2995- Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2996- /* FMFSource=*/ nullptr , " strided.vec" );
2997- unsigned J = 0 ;
2998- for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
2999- Instruction *Member = Group->getMember (I);
2999+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3000+ // so must use intrinsics to deinterleave.
3001+ SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
3002+ DeinterleavedValues[0 ] = NewLoad;
3003+ // For the case of InterleaveFactor > 2, we will have to do recursive
3004+ // deinterleaving, because the current available deinterleave intrinsic
3005+ // supports only Factor of 2, otherwise it will bailout after first
3006+ // iteration.
3007+ // When deinterleaving, the number of values will double until we
3008+ // have "InterleaveFactor".
3009+ for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
3010+ NumVectors *= 2 ) {
3011+ // Deinterleave the elements within the vector
3012+ SmallVector<Value *> TempDeinterleavedValues (NumVectors);
3013+ for (unsigned I = 0 ; I < NumVectors; ++I) {
3014+ auto *DiTy = DeinterleavedValues[I]->getType ();
3015+ TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
3016+ Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3017+ /* FMFSource=*/ nullptr , " strided.vec" );
3018+ }
3019+ // Extract the deinterleaved values:
3020+ for (unsigned I = 0 ; I < 2 ; ++I)
3021+ for (unsigned J = 0 ; J < NumVectors; ++J)
3022+ DeinterleavedValues[NumVectors * I + J] =
3023+ State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
3024+ }
30003025
3001- if (!Member)
3026+ #ifndef NDEBUG
3027+ for (Value *Val : DeinterleavedValues)
3028+ assert (Val && " NULL Deinterleaved Value" );
3029+ #endif
3030+ for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
3031+ Instruction *Member = Group->getMember (I);
3032+ Value *StridedVec = DeinterleavedValues[I];
3033+ if (!Member) {
3034+ // This value is not needed as it's not used
3035+ static_cast <Instruction *>(StridedVec)->eraseFromParent ();
30023036 continue ;
3003-
3004- Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
3037+ }
30053038 // If this member has different type, cast the result type.
30063039 if (Member->getType () != ScalarTy) {
30073040 VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments