@@ -343,50 +343,26 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
343343 /* AddressSpace=*/ 0 , CostKind);
344344}
345345
346- InstructionCost
347- RISCVTTIImpl::isMultipleInsertSubvector (VectorType *Tp, ArrayRef<int > Mask,
348- TTI::TargetCostKind CostKind) {
349- if (!isa<FixedVectorType>(Tp))
350- return InstructionCost::getInvalid ();
351- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (Tp);
352- if (LT.second .getScalarSizeInBits () == 1 )
353- return InstructionCost::getInvalid ();
346+ static bool isRepeatedConcatMaskImpl (ArrayRef<int > Mask, int &SubVectorSize) {
354347 unsigned Size = Mask.size ();
355348 if (!isPowerOf2_32 (Size))
356- return InstructionCost::getInvalid ();
357- // Try to guess subvector size.
358- unsigned SubVecSize;
349+ return false ;
359350 for (unsigned I = 0 ; I != Size; ++I) {
360351 if (static_cast <unsigned >(Mask[I]) == I)
361352 continue ;
362- if (Mask[I] == 0 ) {
363- SubVecSize = I;
364- break ;
365- }
366- return InstructionCost::getInvalid ();
367- }
368- if (Size % SubVecSize != 0 )
369- return InstructionCost::getInvalid ();
370- for (unsigned I = 0 ; I != Size; ++I)
371- if (static_cast <unsigned >(Mask[I]) != I % SubVecSize)
372- return InstructionCost::getInvalid ();
373- InstructionCost Cost = 0 ;
374- unsigned NumSlides = Log2_32 (Size / SubVecSize);
375- // The cost of extraction from a subvector is 0 if the index is 0.
376- for (unsigned I = 0 ; I != NumSlides; ++I) {
377- unsigned InsertIndex = SubVecSize * (1 << I);
378- FixedVectorType *SubTp =
379- FixedVectorType::get (Tp->getElementType (), InsertIndex);
380- FixedVectorType *DestTp =
381- FixedVectorType::getDoubleElementsVectorType (SubTp);
382- std::pair<InstructionCost, MVT> DestLT = getTypeLegalizationCost (DestTp);
383- // Add the cost of whole vector register move because the destination vector
384- // register group for vslideup cannot overlap the source.
385- Cost += DestLT.first * TLI->getLMULCost (DestLT.second );
386- Cost += getShuffleCost (TTI::SK_InsertSubvector, DestTp, {}, CostKind,
387- InsertIndex, SubTp);
353+ if (Mask[I] != 0 )
354+ return false ;
355+ if (Size % I != 0 )
356+ return false ;
357+ for (unsigned J = 0 ; J != Size; ++J)
358+ // Check the pattern is repeated.
359+ if (static_cast <unsigned >(Mask[J]) != J % I)
360+ return false ;
361+ SubVectorSize = I;
362+ return true ;
388363 }
389- return Cost;
364+ // That means Mask is <0, 1, 2, 3>. This is not a concatenation.
365+ return false ;
390366}
391367
392368static VectorType *getVRGatherIndexType (MVT DataVT, const RISCVSubtarget &ST,
@@ -440,10 +416,29 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
440416 LT.second , CostKind);
441417 }
442418 }
443- if (InstructionCost Cost =
444- isMultipleInsertSubvector (Tp, Mask, CostKind);
445- Cost.isValid ())
419+ int SubVectorSize;
420+ if (LT.second .getScalarSizeInBits () != 1 &&
421+ isRepeatedConcatMaskImpl (Mask, SubVectorSize)) {
422+ InstructionCost Cost = 0 ;
423+ unsigned NumSlides = Log2_32 (Mask.size () / SubVectorSize);
424+ // The cost of extraction from a subvector is 0 if the index is 0.
425+ for (unsigned I = 0 ; I != NumSlides; ++I) {
426+ unsigned InsertIndex = SubVectorSize * (1 << I);
427+ FixedVectorType *SubTp =
428+ FixedVectorType::get (Tp->getElementType (), InsertIndex);
429+ FixedVectorType *DestTp =
430+ FixedVectorType::getDoubleElementsVectorType (SubTp);
431+ std::pair<InstructionCost, MVT> DestLT =
432+ getTypeLegalizationCost (DestTp);
433+ // Add the cost of whole vector register move because the
434+ // destination vector register group for vslideup cannot overlap the
435+ // source.
436+ Cost += DestLT.first * TLI->getLMULCost (DestLT.second );
437+ Cost += getShuffleCost (TTI::SK_InsertSubvector, DestTp, {},
438+ CostKind, InsertIndex, SubTp);
439+ }
446440 return Cost;
441+ }
447442 }
448443 // vrgather + cost of generating the mask constant.
449444 // We model this for an unknown mask with a single vrgather.
0 commit comments