@@ -525,6 +525,121 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
525525 }
526526}
527527
528+ // / Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
529+ // / instruction.
530+ // The funciton matches elements from one of the input vector shuffled to the
531+ // left or right with zeroable elements 'shifted in'. It handles both the
532+ // strictly bit-wise element shifts and the byte shfit across an entire 128-bit
533+ // lane.
534+ // Mostly copied from X86.
535+ static int matchShuffleAsShift (MVT &ShiftVT, unsigned &Opcode,
536+ unsigned ScalarSizeInBits, ArrayRef<int > Mask,
537+ int MaskOffset, const APInt &Zeroable) {
538+ int Size = Mask.size ();
539+ unsigned SizeInBits = Size * ScalarSizeInBits;
540+
541+ auto CheckZeros = [&](int Shift, int Scale, bool Left) {
542+ for (int i = 0 ; i < Size; i += Scale)
543+ for (int j = 0 ; j < Shift; ++j)
544+ if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
545+ return false ;
546+
547+ return true ;
548+ };
549+
550+ auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
551+ int Step = 1 ) {
552+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
553+ if (!(Mask[i] == -1 || Mask[i] == Low))
554+ return false ;
555+ return true ;
556+ };
557+
558+ auto MatchShift = [&](int Shift, int Scale, bool Left) {
559+ for (int i = 0 ; i != Size; i += Scale) {
560+ unsigned Pos = Left ? i + Shift : i;
561+ unsigned Low = Left ? i : i + Shift;
562+ unsigned Len = Scale - Shift;
563+ if (!isSequentialOrUndefInRange (Pos, Len, Low + MaskOffset))
564+ return -1 ;
565+ }
566+
567+ int ShiftEltBits = ScalarSizeInBits * Scale;
568+ bool ByteShift = ShiftEltBits > 64 ;
569+ Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
570+ : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
571+ int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1 );
572+
573+ // Normalize the scale for byte shifts to still produce an i64 element
574+ // type.
575+ Scale = ByteShift ? Scale / 2 : Scale;
576+
577+ // We need to round trip through the appropriate type for the shift.
578+ MVT ShiftSVT = MVT::getIntegerVT (ScalarSizeInBits * Scale);
579+ ShiftVT = ByteShift ? MVT::getVectorVT (MVT::i8 , SizeInBits / 8 )
580+ : MVT::getVectorVT (ShiftSVT, Size / Scale);
581+ return (int )ShiftAmt;
582+ };
583+
584+ unsigned MaxWidth = 128 ;
585+ for (int Scale = 2 ; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2 )
586+ for (int Shift = 1 ; Shift != Scale; ++Shift)
587+ for (bool Left : {true , false })
588+ if (CheckZeros (Shift, Scale, Left)) {
589+ int ShiftAmt = MatchShift (Shift, Scale, Left);
590+ if (0 < ShiftAmt)
591+ return ShiftAmt;
592+ }
593+
594+ // no match
595+ return -1 ;
596+ }
597+
598+ // / Lower VECTOR_SHUFFLE as shift (if possible).
599+ // /
600+ // / For example:
601+ // / %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
602+ // / <4 x i32> <i32 4, i32 0, i32 1, i32 2>
603+ // / is lowered to:
604+ // / (VBSLL_V $v0, $v0, 4)
605+ // /
606+ // / %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
607+ // / <4 x i32> <i32 4, i32 0, i32 4, i32 2>
608+ // / is lowered to:
609+ // / (VSLLI_D $v0, $v0, 32)
610+ static SDValue lowerVECTOR_SHUFFLEAsShift (const SDLoc &DL, ArrayRef<int > Mask,
611+ MVT VT, SDValue V1, SDValue V2,
612+ SelectionDAG &DAG,
613+ const APInt &Zeroable) {
614+ int Size = Mask.size ();
615+ assert (Size == (int )VT.getVectorNumElements () && " Unexpected mask size" );
616+
617+ MVT ShiftVT;
618+ SDValue V = V1;
619+ unsigned Opcode;
620+
621+ // Try to match shuffle against V1 shift.
622+ int ShiftAmt = matchShuffleAsShift (ShiftVT, Opcode, VT.getScalarSizeInBits (),
623+ Mask, 0 , Zeroable);
624+
625+ // If V1 failed, try to match shuffle against V2 shift.
626+ if (ShiftAmt < 0 ) {
627+ ShiftAmt = matchShuffleAsShift (ShiftVT, Opcode, VT.getScalarSizeInBits (),
628+ Mask, Size, Zeroable);
629+ V = V2;
630+ }
631+
632+ if (ShiftAmt < 0 )
633+ return SDValue ();
634+
635+ assert (DAG.getTargetLoweringInfo ().isTypeLegal (ShiftVT) &&
636+ " Illegal integer vector type" );
637+ V = DAG.getBitcast (ShiftVT, V);
638+ V = DAG.getNode (Opcode, DL, ShiftVT, V,
639+ DAG.getConstant (ShiftAmt, DL, MVT::i64 ));
640+ return DAG.getBitcast (VT, V);
641+ }
642+
528643// / Determine whether a range fits a regular pattern of values.
529644// / This function accounts for the possibility of jumping over the End iterator.
530645template <typename ValType>
@@ -593,14 +708,12 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
593708static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (const SDLoc &DL,
594709 ArrayRef<int > Mask, MVT VT,
595710 SDValue V1, SDValue V2,
596- SelectionDAG &DAG) {
711+ SelectionDAG &DAG,
712+ const APInt &Zeroable) {
597713 int Bits = VT.getSizeInBits ();
598714 int EltBits = VT.getScalarSizeInBits ();
599715 int NumElements = VT.getVectorNumElements ();
600716
601- APInt KnownUndef, KnownZero;
602- computeZeroableShuffleElements (Mask, V1, V2, KnownUndef, KnownZero);
603- APInt Zeroable = KnownUndef | KnownZero;
604717 if (Zeroable.isAllOnes ())
605718 return DAG.getConstant (0 , DL, VT);
606719
@@ -1062,6 +1175,10 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
10621175 " Unexpected mask size for shuffle!" );
10631176 assert (Mask.size () % 2 == 0 && " Expected even mask size." );
10641177
1178+ APInt KnownUndef, KnownZero;
1179+ computeZeroableShuffleElements (Mask, V1, V2, KnownUndef, KnownZero);
1180+ APInt Zeroable = KnownUndef | KnownZero;
1181+
10651182 SDValue Result;
10661183 // TODO: Add more comparison patterns.
10671184 if (V2.isUndef ()) {
@@ -1089,12 +1206,14 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
10891206 return Result;
10901207 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD (DL, Mask, VT, V1, V2, DAG)))
10911208 return Result;
1209+ if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG,
1210+ Zeroable)))
1211+ return Result;
10921212 if ((Result =
1093- lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG)))
1213+ lowerVECTOR_SHUFFLEAsShift (DL, Mask, VT, V1, V2, DAG, Zeroable )))
10941214 return Result;
10951215 if ((Result = lowerVECTOR_SHUFFLE_VSHUF (DL, Mask, VT, V1, V2, DAG)))
10961216 return Result;
1097-
10981217 return SDValue ();
10991218}
11001219
@@ -1495,6 +1614,10 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
14951614 SmallVector<int > NewMask (Mask);
14961615 canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG);
14971616
1617+ APInt KnownUndef, KnownZero;
1618+ computeZeroableShuffleElements (NewMask, V1, V2, KnownUndef, KnownZero);
1619+ APInt Zeroable = KnownUndef | KnownZero;
1620+
14981621 SDValue Result;
14991622 // TODO: Add more comparison patterns.
15001623 if (V2.isUndef ()) {
@@ -1522,6 +1645,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
15221645 return Result;
15231646 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask, VT, V1, V2, DAG)))
15241647 return Result;
1648+ if ((Result =
1649+ lowerVECTOR_SHUFFLEAsShift (DL, NewMask, VT, V1, V2, DAG, Zeroable)))
1650+ return Result;
15251651 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF (DL, NewMask, VT, V1, V2, DAG)))
15261652 return Result;
15271653
@@ -5041,6 +5167,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
50415167 NODE_NAME_CASE (VANY_NONZERO)
50425168 NODE_NAME_CASE (FRECIPE)
50435169 NODE_NAME_CASE (FRSQRTE)
5170+ NODE_NAME_CASE (VSLLI)
5171+ NODE_NAME_CASE (VSRLI)
5172+ NODE_NAME_CASE (VBSLL)
5173+ NODE_NAME_CASE (VBSRL)
50445174 }
50455175#undef NODE_NAME_CASE
50465176 return nullptr ;
0 commit comments