@@ -542,6 +542,132 @@ fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
542542 return true ;
543543}
544544
545+ // / Compute whether each element of a shuffle is zeroable.
546+ // /
547+ // / A "zeroable" vector shuffle element is one which can be lowered to zero.
548+ static void computeZeroableShuffleElements (ArrayRef<int > Mask, SDValue V1,
549+ SDValue V2, APInt &KnownUndef,
550+ APInt &KnownZero) {
551+ int Size = Mask.size ();
552+ KnownUndef = KnownZero = APInt::getZero (Size);
553+
554+ V1 = peekThroughBitcasts (V1);
555+ V2 = peekThroughBitcasts (V2);
556+
557+ bool V1IsZero = ISD::isBuildVectorAllZeros (V1.getNode ());
558+ bool V2IsZero = ISD::isBuildVectorAllZeros (V2.getNode ());
559+
560+ int VectorSizeInBits = V1.getValueSizeInBits ();
561+ int ScalarSizeInBits = VectorSizeInBits / Size;
562+ assert (!(VectorSizeInBits % ScalarSizeInBits) && " Illegal shuffle mask size" );
563+
564+ for (int i = 0 ; i < Size; ++i) {
565+ int M = Mask[i];
566+ if (M < 0 ) {
567+ KnownUndef.setBit (i);
568+ continue ;
569+ }
570+ if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
571+ KnownZero.setBit (i);
572+ continue ;
573+ }
574+ }
575+ }
576+
577+ // / Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
578+ // /
579+ // / For example:
580+ // / %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
581+ // / <4 x i32> <i32 0, i32 4, i32 1, i32 4>
582+ // / %3 = bitcast <4 x i32> %2 to <2 x i64>
583+ // / is lowered to:
584+ // / (VREPLI $v1, 0)
585+ // / (VILVL $v0, $v1, $v0)
586+ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (const SDLoc &DL,
587+ ArrayRef<int > Mask, MVT VT,
588+ SDValue V1, SDValue V2,
589+ SelectionDAG &DAG) {
590+ int Bits = VT.getSizeInBits ();
591+ int EltBits = VT.getScalarSizeInBits ();
592+ int NumElements = VT.getVectorNumElements ();
593+
594+ APInt KnownUndef, KnownZero;
595+ computeZeroableShuffleElements (Mask, V1, V2, KnownUndef, KnownZero);
596+ APInt Zeroable = KnownUndef | KnownZero;
597+ if (Zeroable.isAllOnes ())
598+ return DAG.getConstant (0 , DL, VT);
599+
600+ // Define a helper function to check a particular ext-scale and lower to it if
601+ // valid.
602+ auto Lower = [&](int Scale) -> SDValue {
603+ SDValue InputV;
604+ bool AnyExt = true ;
605+ int Offset = 0 ;
606+ for (int i = 0 ; i < NumElements; i++) {
607+ int M = Mask[i];
608+ if (M < 0 )
609+ continue ;
610+ if (i % Scale != 0 ) {
611+ // Each of the extended elements need to be zeroable.
612+ if (!Zeroable[i])
613+ return SDValue ();
614+
615+ AnyExt = false ;
616+ continue ;
617+ }
618+
619+ // Each of the base elements needs to be consecutive indices into the
620+ // same input vector.
621+ SDValue V = M < NumElements ? V1 : V2;
622+ M = M % NumElements;
623+ if (!InputV) {
624+ InputV = V;
625+ Offset = M - (i / Scale);
626+
627+ // These offset can't be handled
628+ if (Offset % (NumElements / Scale))
629+ return SDValue ();
630+ } else if (InputV != V)
631+ return SDValue ();
632+
633+ if (M != (Offset + (i / Scale)))
634+ return SDValue (); // Non-consecutive strided elements.
635+ }
636+
637+ // If we fail to find an input, we have a zero-shuffle which should always
638+ // have already been handled.
639+ if (!InputV)
640+ return SDValue ();
641+
642+ do {
643+ unsigned VilVLoHi = LoongArchISD::VILVL;
644+ if (Offset >= (NumElements / 2 )) {
645+ VilVLoHi = LoongArchISD::VILVH;
646+ Offset -= (NumElements / 2 );
647+ }
648+
649+ MVT InputVT = MVT::getVectorVT (MVT::getIntegerVT (EltBits), NumElements);
650+ SDValue Ext =
651+ AnyExt ? DAG.getFreeze (InputV) : DAG.getConstant (0 , DL, InputVT);
652+ InputV = DAG.getBitcast (InputVT, InputV);
653+ InputV = DAG.getNode (VilVLoHi, DL, InputVT, Ext, InputV);
654+ Scale /= 2 ;
655+ EltBits *= 2 ;
656+ NumElements /= 2 ;
657+ } while (Scale > 1 );
658+ return DAG.getBitcast (VT, InputV);
659+ };
660+
661+ // Each iteration, try extending the elements half as much, but into twice as
662+ // many elements.
663+ for (int NumExtElements = Bits / 64 ; NumExtElements < NumElements;
664+ NumExtElements *= 2 ) {
665+ if (SDValue V = Lower (NumElements / NumExtElements))
666+ return V;
667+ }
668+ return SDValue ();
669+ }
670+
545671// / Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
546672// /
547673// / VREPLVEI performs vector broadcast based on an element specified by an
@@ -956,6 +1082,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
9561082 return Result;
9571083 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD (DL, Mask, VT, V1, V2, DAG)))
9581084 return Result;
1085+ if ((Result =
1086+ lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG)))
1087+ return Result;
9591088 if ((Result = lowerVECTOR_SHUFFLE_VSHUF (DL, Mask, VT, V1, V2, DAG)))
9601089 return Result;
9611090
0 commit comments