@@ -2789,21 +2789,10 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
27892789 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
27902790 // must use intrinsics to interleave.
27912791 if (VecTy->isScalableTy ()) {
2792- assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2793- " scalable vectors, must be power of 2" );
2794- SmallVector<Value *> InterleavingValues (Vals);
2795- // When interleaving, the number of values will be shrunk until we have the
2796- // single final interleaved value.
2797- auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2798- for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2799- InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2800- for (unsigned I = 0 ; I < Midpoint; ++I)
2801- InterleavingValues[I] = Builder.CreateIntrinsic (
2802- InterleaveTy, Intrinsic::vector_interleave2,
2803- {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2804- /* FMFSource=*/ nullptr , Name);
2805- }
2806- return InterleavingValues[0 ];
2792+ VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2793+ return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2794+ Vals,
2795+ /* FMFSource=*/ nullptr , Name);
28072796 }
28082797
28092798 // Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2889,11 +2878,15 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
28892878 &InterleaveFactor](Value *MaskForGaps) -> Value * {
28902879 if (State.VF .isScalable ()) {
28912880 assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2892- assert (isPowerOf2_32 ( InterleaveFactor) &&
2881+ assert (InterleaveFactor == 2 &&
28932882 " Unsupported deinterleave factor for scalable vectors" );
28942883 auto *ResBlockInMask = State.get (BlockInMask);
2895- SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2896- return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
2884+ SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2885+ auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2886+ State.VF .getKnownMinValue () * 2 , true );
2887+ return State.Builder .CreateIntrinsic (
2888+ MaskTy, Intrinsic::vector_interleave2, Ops,
2889+ /* FMFSource=*/ nullptr , " interleaved.mask" );
28972890 }
28982891
28992892 if (!BlockInMask)
@@ -2933,48 +2926,22 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29332926 ArrayRef<VPValue *> VPDefs = definedValues ();
29342927 const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
29352928 if (VecTy->isScalableTy ()) {
2936- assert (isPowerOf2_32 ( InterleaveFactor) &&
2929+ assert (InterleaveFactor == 2 &&
29372930 " Unsupported deinterleave factor for scalable vectors" );
29382931
2939- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2940- // so must use intrinsics to deinterleave.
2941- SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
2942- DeinterleavedValues[0 ] = NewLoad;
2943- // For the case of InterleaveFactor > 2, we will have to do recursive
2944- // deinterleaving, because the current available deinterleave intrinsic
2945- // supports only Factor of 2, otherwise it will bailout after first
2946- // iteration.
2947- // When deinterleaving, the number of values will double until we
2948- // have "InterleaveFactor".
2949- for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
2950- NumVectors *= 2 ) {
2951- // Deinterleave the elements within the vector
2952- SmallVector<Value *> TempDeinterleavedValues (NumVectors);
2953- for (unsigned I = 0 ; I < NumVectors; ++I) {
2954- auto *DiTy = DeinterleavedValues[I]->getType ();
2955- TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
2956- Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
2957- /* FMFSource=*/ nullptr , " strided.vec" );
2958- }
2959- // Extract the deinterleaved values:
2960- for (unsigned I = 0 ; I < 2 ; ++I)
2961- for (unsigned J = 0 ; J < NumVectors; ++J)
2962- DeinterleavedValues[NumVectors * I + J] =
2963- State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
2964- }
2965-
2966- #ifndef NDEBUG
2967- for (Value *Val : DeinterleavedValues)
2968- assert (Val && " NULL Deinterleaved Value" );
2969- #endif
2970- for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
2932+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2933+ // so must use intrinsics to deinterleave.
2934+ Value *DI = State.Builder .CreateIntrinsic (
2935+ Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2936+ /* FMFSource=*/ nullptr , " strided.vec" );
2937+ unsigned J = 0 ;
2938+ for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
29712939 Instruction *Member = Group->getMember (I);
2972- Value *StridedVec = DeinterleavedValues[I];
2973- if (!Member) {
2974- // This value is not needed as it's not used
2975- static_cast <Instruction *>(StridedVec)->eraseFromParent ();
2940+
2941+ if (!Member)
29762942 continue ;
2977- }
2943+
2944+ Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
29782945 // If this member has different type, cast the result type.
29792946 if (Member->getType () != ScalarTy) {
29802947 VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments