@@ -393,8 +393,9 @@ static InstructionCost
393393costShuffleViaVRegSplitting (RISCVTTIImpl &TTI, MVT LegalVT,
394394 std::optional<unsigned > VLen, VectorType *Tp,
395395 ArrayRef<int > Mask, TTI::TargetCostKind CostKind) {
396+ assert (LegalVT.isFixedLengthVector ());
396397 InstructionCost NumOfDests = InstructionCost::getInvalid ();
397- if (VLen && LegalVT. isFixedLengthVector () && !Mask.empty ()) {
398+ if (VLen && !Mask.empty ()) {
398399 MVT ElemVT = LegalVT.getVectorElementType ();
399400 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits ();
400401 LegalVT = TTI.getTypeLegalizationCost (
@@ -404,7 +405,6 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT,
404405 NumOfDests = divideCeil (Mask.size (), LegalVT.getVectorNumElements ());
405406 }
406407 if (!NumOfDests.isValid () || NumOfDests <= 1 ||
407- !LegalVT.isFixedLengthVector () ||
408408 LegalVT.getVectorElementType ().getSizeInBits () !=
409409 Tp->getElementType ()->getPrimitiveSizeInBits () ||
410410 LegalVT.getVectorNumElements () >= Tp->getElementCount ().getFixedValue ())
@@ -487,7 +487,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
487487 // First, handle cases where having a fixed length vector enables us to
488488 // give a more accurate cost than falling back to generic scalable codegen.
489489 // TODO: Each of these cases hints at a modeling gap around scalable vectors.
490- if (ST->hasVInstructions () && isa<FixedVectorType>(Tp)) {
490+ if (ST->hasVInstructions () && isa<FixedVectorType>(Tp) &&
491+ LT.second .isFixedLengthVector ()) {
491492 InstructionCost VRegSplittingCost = costShuffleViaVRegSplitting (
492493 *this , LT.second , ST->getRealVLen (), Tp, Mask, CostKind);
493494 if (VRegSplittingCost.isValid ())
@@ -496,7 +497,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
496497 default :
497498 break ;
498499 case TTI::SK_PermuteSingleSrc: {
499- if (Mask.size () >= 2 && LT. second . isFixedLengthVector () ) {
500+ if (Mask.size () >= 2 ) {
500501 MVT EltTp = LT.second .getVectorElementType ();
501502 // If the size of the element is < ELEN then shuffles of interleaves and
502503 // deinterleaves of 2 vectors can be lowered into the following
@@ -545,24 +546,23 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
545546 }
546547 // vrgather + cost of generating the mask constant.
547548 // We model this for an unknown mask with a single vrgather.
548- if (LT.second . isFixedLengthVector () && LT.first == 1 &&
549- ( LT.second .getScalarSizeInBits () != 8 ||
550- LT. second . getVectorNumElements () <= 256 )) {
551- VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, Tp->getContext ());
549+ if (LT.first == 1 && ( LT.second . getScalarSizeInBits () != 8 ||
550+ LT.second .getVectorNumElements () <= 256 )) {
551+ VectorType *IdxTy =
552+ getVRGatherIndexType (LT.second , *ST, Tp->getContext ());
552553 InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
553554 return IndexCost +
554555 getRISCVInstructionCost (RISCV::VRGATHER_VV, LT.second , CostKind);
555556 }
556- [[fallthrough]] ;
557+ break ;
557558 }
558559 case TTI::SK_Transpose:
559560 case TTI::SK_PermuteTwoSrc: {
560561 // 2 x (vrgather + cost of generating the mask constant) + cost of mask
561562 // register for the second vrgather. We model this for an unknown
562563 // (shuffle) mask.
563- if (LT.second .isFixedLengthVector () && LT.first == 1 &&
564- (LT.second .getScalarSizeInBits () != 8 ||
565- LT.second .getVectorNumElements () <= 256 )) {
564+ if (LT.first == 1 && (LT.second .getScalarSizeInBits () != 8 ||
565+ LT.second .getVectorNumElements () <= 256 )) {
566566 auto &C = Tp->getContext ();
567567 auto EC = Tp->getElementCount ();
568568 VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, C);
@@ -574,56 +574,65 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
574574 LT.second , CostKind) +
575575 MaskCost;
576576 }
577- [[fallthrough]];
578- }
579- case TTI::SK_Select: {
580- // We are going to permute multiple sources and the result will be in
581- // multiple destinations. Providing an accurate cost only for splits where
582- // the element type remains the same.
583- if (!Mask.empty () && LT.first .isValid () && LT.first != 1 &&
584- LT.second .isFixedLengthVector () &&
585- LT.second .getVectorElementType ().getSizeInBits () ==
586- Tp->getElementType ()->getPrimitiveSizeInBits () &&
587- LT.second .getVectorNumElements () <
588- cast<FixedVectorType>(Tp)->getNumElements () &&
589- divideCeil (Mask.size (),
590- cast<FixedVectorType>(Tp)->getNumElements ()) ==
591- static_cast <unsigned >(*LT.first .getValue ())) {
592- unsigned NumRegs = *LT.first .getValue ();
593- unsigned VF = cast<FixedVectorType>(Tp)->getNumElements ();
594- unsigned SubVF = PowerOf2Ceil (VF / NumRegs);
595- auto *SubVecTy = FixedVectorType::get (Tp->getElementType (), SubVF);
596-
597- InstructionCost Cost = 0 ;
598- for (unsigned I = 0 , NumSrcRegs = divideCeil (Mask.size (), SubVF);
599- I < NumSrcRegs; ++I) {
600- bool IsSingleVector = true ;
601- SmallVector<int > SubMask (SubVF, PoisonMaskElem);
602- transform (
603- Mask.slice (I * SubVF,
604- I == NumSrcRegs - 1 ? Mask.size () % SubVF : SubVF),
605- SubMask.begin (), [&](int I) -> int {
606- if (I == PoisonMaskElem)
607- return PoisonMaskElem;
608- bool SingleSubVector = I / VF == 0 ;
609- IsSingleVector &= SingleSubVector;
610- return (SingleSubVector ? 0 : 1 ) * SubVF + (I % VF) % SubVF;
611- });
612- if (all_of (enumerate(SubMask), [](auto &&P) {
613- return P.value () == PoisonMaskElem ||
614- static_cast <unsigned >(P.value ()) == P.index ();
615- }))
616- continue ;
617- Cost += getShuffleCost (IsSingleVector ? TTI::SK_PermuteSingleSrc
618- : TTI::SK_PermuteTwoSrc,
619- SubVecTy, SubMask, CostKind, 0 , nullptr );
620- }
621- return Cost;
622- }
623577 break ;
624578 }
625579 }
626- };
580+
581+ auto shouldSplit = [](TTI::ShuffleKind Kind) {
582+ switch (Kind) {
583+ default :
584+ return false ;
585+ case TTI::SK_PermuteSingleSrc:
586+ case TTI::SK_Transpose:
587+ case TTI::SK_PermuteTwoSrc:
588+ case TTI::SK_Select:
589+ return true ;
590+ }
591+ };
592+ // We are going to permute multiple sources and the result will be in
593+ // multiple destinations. Providing an accurate cost only for splits where
594+ // the element type remains the same.
595+ if (!Mask.empty () && LT.first .isValid () && LT.first != 1 &&
596+ shouldSplit (Kind) &&
597+ LT.second .getVectorElementType ().getSizeInBits () ==
598+ Tp->getElementType ()->getPrimitiveSizeInBits () &&
599+ LT.second .getVectorNumElements () <
600+ cast<FixedVectorType>(Tp)->getNumElements () &&
601+ divideCeil (Mask.size (),
602+ cast<FixedVectorType>(Tp)->getNumElements ()) ==
603+ static_cast <unsigned >(*LT.first .getValue ())) {
604+ unsigned NumRegs = *LT.first .getValue ();
605+ unsigned VF = cast<FixedVectorType>(Tp)->getNumElements ();
606+ unsigned SubVF = PowerOf2Ceil (VF / NumRegs);
607+ auto *SubVecTy = FixedVectorType::get (Tp->getElementType (), SubVF);
608+
609+ InstructionCost Cost = 0 ;
610+ for (unsigned I = 0 , NumSrcRegs = divideCeil (Mask.size (), SubVF);
611+ I < NumSrcRegs; ++I) {
612+ bool IsSingleVector = true ;
613+ SmallVector<int > SubMask (SubVF, PoisonMaskElem);
614+ transform (
615+ Mask.slice (I * SubVF,
616+ I == NumSrcRegs - 1 ? Mask.size () % SubVF : SubVF),
617+ SubMask.begin (), [&](int I) -> int {
618+ if (I == PoisonMaskElem)
619+ return PoisonMaskElem;
620+ bool SingleSubVector = I / VF == 0 ;
621+ IsSingleVector &= SingleSubVector;
622+ return (SingleSubVector ? 0 : 1 ) * SubVF + (I % VF) % SubVF;
623+ });
624+ if (all_of (enumerate(SubMask), [](auto &&P) {
625+ return P.value () == PoisonMaskElem ||
626+ static_cast <unsigned >(P.value ()) == P.index ();
627+ }))
628+ continue ;
629+ Cost += getShuffleCost (IsSingleVector ? TTI::SK_PermuteSingleSrc
630+ : TTI::SK_PermuteTwoSrc,
631+ SubVecTy, SubMask, CostKind, 0 , nullptr );
632+ }
633+ return Cost;
634+ }
635+ }
627636
628637 // Handle scalable vectors (and fixed vectors legalized to scalable vectors).
629638 switch (Kind) {
0 commit comments