@@ -744,3 +744,200 @@ define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
744744 %s = shufflevector <64 x i8 > %wide.vec , <64 x i8 > poison, <8 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 32 , i32 40 , i32 48 , i32 56 >
745745 ret <8 x i8 > %s
746746}
747+
748+ define <8 x i8 > @shuffle_compress_singlesrc_e8 (<8 x i8 > %v ) {
749+ ; CHECK-LABEL: shuffle_compress_singlesrc_e8:
750+ ; CHECK: # %bb.0:
751+ ; CHECK-NEXT: lui a0, %hi(.LCPI49_0)
752+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0)
753+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
754+ ; CHECK-NEXT: vle8.v v10, (a0)
755+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
756+ ; CHECK-NEXT: vmv1r.v v8, v9
757+ ; CHECK-NEXT: ret
758+ %out = shufflevector <8 x i8 > %v , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
759+ ret <8 x i8 > %out
760+ }
761+
762+ define <8 x i16 > @shuffle_compress_singlesrc_e16 (<8 x i16 > %v ) {
763+ ; CHECK-LABEL: shuffle_compress_singlesrc_e16:
764+ ; CHECK: # %bb.0:
765+ ; CHECK-NEXT: lui a0, %hi(.LCPI50_0)
766+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0)
767+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
768+ ; CHECK-NEXT: vle16.v v10, (a0)
769+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
770+ ; CHECK-NEXT: vmv.v.v v8, v9
771+ ; CHECK-NEXT: ret
772+ %out = shufflevector <8 x i16 > %v , <8 x i16 > poison, <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
773+ ret <8 x i16 > %out
774+ }
775+
776+ define <8 x i32 > @shuffle_compress_singlesrc_e32 (<8 x i32 > %v ) {
777+ ; CHECK-LABEL: shuffle_compress_singlesrc_e32:
778+ ; CHECK: # %bb.0:
779+ ; CHECK-NEXT: lui a0, %hi(.LCPI51_0)
780+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0)
781+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
782+ ; CHECK-NEXT: vle16.v v12, (a0)
783+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
784+ ; CHECK-NEXT: vmv.v.v v8, v10
785+ ; CHECK-NEXT: ret
786+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 1 , i32 4 , i32 5 , i32 6 , i32 undef , i32 undef , i32 undef >
787+ ret <8 x i32 > %out
788+ }
789+
790+ define <8 x i64 > @shuffle_compress_singlesrc_e64 (<8 x i64 > %v ) {
791+ ; CHECK-LABEL: shuffle_compress_singlesrc_e64:
792+ ; CHECK: # %bb.0:
793+ ; CHECK-NEXT: lui a0, %hi(.LCPI52_0)
794+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0)
795+ ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
796+ ; CHECK-NEXT: vle16.v v16, (a0)
797+ ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
798+ ; CHECK-NEXT: vmv.v.v v8, v12
799+ ; CHECK-NEXT: ret
800+ %out = shufflevector <8 x i64 > %v , <8 x i64 > poison, <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
801+ ret <8 x i64 > %out
802+ }
803+
804+ define <8 x i32 > @shuffle_compress_singlesrc_gaps_e32 (<8 x i32 > %v ) {
805+ ; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32:
806+ ; CHECK: # %bb.0:
807+ ; CHECK-NEXT: lui a0, %hi(.LCPI53_0)
808+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0)
809+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
810+ ; CHECK-NEXT: vle16.v v12, (a0)
811+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
812+ ; CHECK-NEXT: vmv.v.v v8, v10
813+ ; CHECK-NEXT: ret
814+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 4 , i32 5 , i32 7 , i32 undef , i32 undef , i32 undef >
815+ ret <8 x i32 > %out
816+ }
817+
818+ define <8 x i32 > @shuffle_decompress2_singlesrc_e32 (<8 x i32 > %v ) {
819+ ; CHECK-LABEL: shuffle_decompress2_singlesrc_e32:
820+ ; CHECK: # %bb.0:
821+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
822+ ; CHECK-NEXT: vwaddu.vv v10, v8, v8
823+ ; CHECK-NEXT: li a0, -1
824+ ; CHECK-NEXT: vwmaccu.vx v10, a0, v8
825+ ; CHECK-NEXT: vmv2r.v v8, v10
826+ ; CHECK-NEXT: ret
827+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 1 , i32 undef , i32 2 , i32 undef , i32 3 , i32 undef >
828+ ret <8 x i32 > %out
829+ }
830+
831+ define <8 x i32 > @shuffle_decompress3_singlesrc_e32 (<8 x i32 > %v ) {
832+ ; RV32-LABEL: shuffle_decompress3_singlesrc_e32:
833+ ; RV32: # %bb.0:
834+ ; RV32-NEXT: lui a0, %hi(.LCPI55_0)
835+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0)
836+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
837+ ; RV32-NEXT: vle16.v v12, (a0)
838+ ; RV32-NEXT: vrgatherei16.vv v10, v8, v12
839+ ; RV32-NEXT: vmv.v.v v8, v10
840+ ; RV32-NEXT: ret
841+ ;
842+ ; RV64-LABEL: shuffle_decompress3_singlesrc_e32:
843+ ; RV64: # %bb.0:
844+ ; RV64-NEXT: lui a0, 32769
845+ ; RV64-NEXT: slli a0, a0, 21
846+ ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
847+ ; RV64-NEXT: vmv.v.x v12, a0
848+ ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
849+ ; RV64-NEXT: vrgatherei16.vv v10, v8, v12
850+ ; RV64-NEXT: vmv.v.v v8, v10
851+ ; RV64-NEXT: ret
852+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 2 , i32 undef >
853+ ret <8 x i32 > %out
854+ }
855+
856+ ; TODO: This should be a single vslideup.vi
857+ define <8 x i32 > @shuffle_decompress4_singlesrc_e32 (<8 x i32 > %v ) {
858+ ; CHECK-LABEL: shuffle_decompress4_singlesrc_e32:
859+ ; CHECK: # %bb.0:
860+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
861+ ; CHECK-NEXT: vid.v v10
862+ ; CHECK-NEXT: vsrl.vi v12, v10, 2
863+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
864+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
865+ ; CHECK-NEXT: vmv.v.v v8, v10
866+ ; CHECK-NEXT: ret
867+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef >
868+ ret <8 x i32 > %out
869+ }
870+
871+ ; TODO: This should be either a single vslideup.vi or two widening interleaves.
872+ define <8 x i8 > @shuffle_decompress4_singlesrc_e8 (<8 x i8 > %v ) {
873+ ; CHECK-LABEL: shuffle_decompress4_singlesrc_e8:
874+ ; CHECK: # %bb.0:
875+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
876+ ; CHECK-NEXT: vid.v v9
877+ ; CHECK-NEXT: vsrl.vi v10, v9, 2
878+ ; CHECK-NEXT: vrgather.vv v9, v8, v10
879+ ; CHECK-NEXT: vmv1r.v v8, v9
880+ ; CHECK-NEXT: ret
881+ %out = shufflevector <8 x i8 > %v , <8 x i8 > poison, <8 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef >
882+ ret <8 x i8 > %out
883+ }
884+
885+ define <8 x i32 > @shuffle_decompress_singlesrc_e32 (<8 x i32 > %v ) {
886+ ; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
887+ ; CHECK: # %bb.0:
888+ ; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
889+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0)
890+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
891+ ; CHECK-NEXT: vle16.v v12, (a0)
892+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
893+ ; CHECK-NEXT: vmv.v.v v8, v10
894+ ; CHECK-NEXT: ret
895+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 undef , i32 1 , i32 undef , i32 3 , i32 undef , i32 undef , i32 4 >
896+ ret <8 x i32 > %out
897+ }
898+
899+ define <8 x i32 > @shuffle_repeat2_singlesrc_e32 (<8 x i32 > %v ) {
900+ ; CHECK-LABEL: shuffle_repeat2_singlesrc_e32:
901+ ; CHECK: # %bb.0:
902+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
903+ ; CHECK-NEXT: vwaddu.vv v10, v8, v8
904+ ; CHECK-NEXT: li a0, -1
905+ ; CHECK-NEXT: vwmaccu.vx v10, a0, v8
906+ ; CHECK-NEXT: vmv2r.v v8, v10
907+ ; CHECK-NEXT: ret
908+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
909+ ret <8 x i32 > %out
910+ }
911+
912+ define <8 x i32 > @shuffle_repeat3_singlesrc_e32 (<8 x i32 > %v ) {
913+ ; CHECK-LABEL: shuffle_repeat3_singlesrc_e32:
914+ ; CHECK: # %bb.0:
915+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
916+ ; CHECK-NEXT: vmv.v.i v0, 7
917+ ; CHECK-NEXT: vmv.v.i v11, 1
918+ ; CHECK-NEXT: li a0, 192
919+ ; CHECK-NEXT: vmv.s.x v10, a0
920+ ; CHECK-NEXT: vmerge.vim v11, v11, 0, v0
921+ ; CHECK-NEXT: vmv.v.v v0, v10
922+ ; CHECK-NEXT: vmerge.vim v12, v11, 2, v0
923+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
924+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
925+ ; CHECK-NEXT: vmv.v.v v8, v10
926+ ; CHECK-NEXT: ret
927+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 0 , i32 0 , i32 1 , i32 1 , i32 1 , i32 2 , i32 2 >
928+ ret <8 x i32 > %out
929+ }
930+
931+ define <8 x i32 > @shuffle_repeat4_singlesrc_e32 (<8 x i32 > %v ) {
932+ ; CHECK-LABEL: shuffle_repeat4_singlesrc_e32:
933+ ; CHECK: # %bb.0:
934+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
935+ ; CHECK-NEXT: vid.v v10
936+ ; CHECK-NEXT: vsrl.vi v12, v10, 2
937+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
938+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
939+ ; CHECK-NEXT: vmv.v.v v8, v10
940+ ; CHECK-NEXT: ret
941+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 , i32 1 , i32 1 , i32 1 , i32 1 >
942+ ret <8 x i32 > %out
943+ }
0 commit comments