@@ -597,6 +597,31 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
597597
598598 SplatActions.clampScalar (1 , sXLen , sXLen );
599599
600+ LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
601+ LLT DstTy = Query.Types [0 ];
602+ LLT SrcTy = Query.Types [1 ];
603+ return DstTy.getElementType () == LLT::scalar (1 ) &&
604+ DstTy.getElementCount ().getKnownMinValue () >= 8 &&
605+ SrcTy.getElementCount ().getKnownMinValue () >= 8 ;
606+ };
607+ getActionDefinitionsBuilder (G_EXTRACT_SUBVECTOR)
608+ // We don't have the ability to slide mask vectors down indexed by their
609+ // i1 elements; the smallest we can do is i8. Often we are able to bitcast
610+ // to equivalent i8 vectors.
611+ .bitcastIf (
612+ all (typeIsLegalBoolVec (0 , BoolVecTys, ST),
613+ typeIsLegalBoolVec (1 , BoolVecTys, ST), ExtractSubvecBitcastPred),
614+ [=](const LegalityQuery &Query) {
615+ LLT CastTy = LLT::vector (
616+ Query.Types [0 ].getElementCount ().divideCoefficientBy (8 ), 8 );
617+ return std::pair (0 , CastTy);
618+ })
619+ .customIf (LegalityPredicates::any (
620+ all (typeIsLegalBoolVec (0 , BoolVecTys, ST),
621+ typeIsLegalBoolVec (1 , BoolVecTys, ST)),
622+ all (typeIsLegalIntOrFPVec (0 , IntOrFPVecTys, ST),
623+ typeIsLegalIntOrFPVec (1 , IntOrFPVecTys, ST))));
624+
600625 getLegacyLegalizerInfo ().computeTables ();
601626}
602627
@@ -931,6 +956,105 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
931956 return true ;
932957}
933958
959+ static LLT getLMUL1Ty (LLT VecTy) {
960+ assert (VecTy.getElementType ().getSizeInBits () <= 64 &&
961+ " Unexpected vector LLT" );
962+ return LLT::scalable_vector (RISCV::RVVBitsPerBlock /
963+ VecTy.getElementType ().getSizeInBits (),
964+ VecTy.getElementType ());
965+ }
966+
967+ bool RISCVLegalizerInfo::legalizeExtractSubvector (MachineInstr &MI,
968+ LegalizerHelper &Helper,
969+ MachineIRBuilder &MIB) const {
970+ GExtractSubvector &ES = cast<GExtractSubvector>(MI);
971+
972+ MachineRegisterInfo &MRI = *MIB.getMRI ();
973+
974+ Register Dst = ES.getReg (0 );
975+ Register Src = ES.getSrcVec ();
976+ uint64_t Idx = ES.getIndexImm ();
977+
978+ // With an index of 0 this is a cast-like subvector, which can be performed
979+ // with subregister operations.
980+ if (Idx == 0 )
981+ return true ;
982+
983+ LLT LitTy = MRI.getType (Dst);
984+ LLT BigTy = MRI.getType (Src);
985+
986+ if (LitTy.getElementType () == LLT::scalar (1 )) {
987+ // We can't slide this mask vector up indexed by its i1 elements.
988+ // This poses a problem when we wish to insert a scalable vector which
989+ // can't be re-expressed as a larger type. Just choose the slow path and
990+ // extend to a larger type, then truncate back down.
991+ LLT ExtBigTy = BigTy.changeElementType (LLT::scalar (8 ));
992+ LLT ExtLitTy = LitTy.changeElementType (LLT::scalar (8 ));
993+ auto BigZExt = MIB.buildZExt (ExtBigTy, Src);
994+ auto ExtractZExt = MIB.buildExtractSubvector (ExtLitTy, BigZExt, Idx);
995+ auto SplatZero = MIB.buildSplatVector (
996+ ExtLitTy, MIB.buildConstant (ExtLitTy.getElementType (), 0 ));
997+ MIB.buildICmp (CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
998+ MI.eraseFromParent ();
999+ return true ;
1000+ }
1001+
1002+ // extract_subvector scales the index by vscale if the subvector is scalable,
1003+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1004+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo ();
1005+ MVT LitTyMVT = getMVTForLLT (LitTy);
1006+ auto Decompose =
1007+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs (
1008+ getMVTForLLT (BigTy), LitTyMVT, Idx, TRI);
1009+ unsigned RemIdx = Decompose.second ;
1010+
1011+ // If the Idx has been completely eliminated then this is a subvector extract
1012+ // which naturally aligns to a vector register. These can easily be handled
1013+ // using subregister manipulation.
1014+ if (RemIdx == 0 )
1015+ return true ;
1016+
1017+ // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1018+ // was > M1 then the index would need to be a multiple of VLMAX, and so would
1019+ // divide exactly.
1020+ assert (
1021+ RISCVVType::decodeVLMUL (RISCVTargetLowering::getLMUL (LitTyMVT)).second ||
1022+ RISCVTargetLowering::getLMUL (LitTyMVT) == RISCVII::VLMUL::LMUL_1);
1023+
1024+ // If the vector type is an LMUL-group type, extract a subvector equal to the
1025+ // nearest full vector register type.
1026+ LLT InterLitTy = BigTy;
1027+ Register Vec = Src;
1028+ if (TypeSize::isKnownGT (BigTy.getSizeInBits (),
1029+ getLMUL1Ty (BigTy).getSizeInBits ())) {
1030+ // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1031+ // we should have successfully decomposed the extract into a subregister.
1032+ assert (Decompose.first != RISCV::NoSubRegister);
1033+ InterLitTy = getLMUL1Ty (BigTy);
1034+ // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1035+ // specified on the source Register (the equivalent) since generic virtual
1036+ // register does not allow subregister index.
1037+ Vec = MIB.buildExtractSubvector (InterLitTy, Src, Idx - RemIdx).getReg (0 );
1038+ }
1039+
1040+ // Slide this vector register down by the desired number of elements in order
1041+ // to place the desired subvector starting at element 0.
1042+ const LLT XLenTy (STI.getXLenVT ());
1043+ auto SlidedownAmt = MIB.buildVScale (XLenTy, RemIdx);
1044+ auto [Mask, VL] = buildDefaultVLOps (LitTy, MIB, MRI);
1045+ uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
1046+ auto Slidedown = MIB.buildInstr (
1047+ RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1048+ {MIB.buildUndef (InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1049+
1050+ // Now the vector is in the right position, extract our final subvector. This
1051+ // should resolve to a COPY.
1052+ MIB.buildExtractSubvector (Dst, Slidedown, 0 );
1053+
1054+ MI.eraseFromParent ();
1055+ return true ;
1056+ }
1057+
9341058bool RISCVLegalizerInfo::legalizeCustom (
9351059 LegalizerHelper &Helper, MachineInstr &MI,
9361060 LostDebugLocObserver &LocObserver) const {
@@ -1001,6 +1125,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
10011125 return legalizeExt (MI, MIRBuilder);
10021126 case TargetOpcode::G_SPLAT_VECTOR:
10031127 return legalizeSplatVector (MI, MIRBuilder);
1128+ case TargetOpcode::G_EXTRACT_SUBVECTOR:
1129+ return legalizeExtractSubvector (MI, Helper, MIRBuilder);
10041130 case TargetOpcode::G_LOAD:
10051131 case TargetOpcode::G_STORE:
10061132 return legalizeLoadStore (MI, Helper, MIRBuilder);
0 commit comments