@@ -810,8 +810,8 @@ define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) {
810810 ret <8 x i32 > %out
811811}
812812
813- define <8 x i32 > @shuffle_decompress2_singlesrc_e32 (<8 x i32 > %v ) {
814- ; CHECK-LABEL: shuffle_decompress2_singlesrc_e32 :
813+ define <8 x i32 > @shuffle_spread2_singlesrc_e32 (<8 x i32 > %v ) {
814+ ; CHECK-LABEL: shuffle_spread2_singlesrc_e32 :
815815; CHECK: # %bb.0:
816816; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
817817; CHECK-NEXT: vwaddu.vv v10, v8, v8
@@ -823,18 +823,46 @@ define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) {
823823 ret <8 x i32 > %out
824824}
825825
826- define <8 x i32 > @shuffle_decompress3_singlesrc_e32 (<8 x i32 > %v ) {
827- ; RV32-LABEL: shuffle_decompress3_singlesrc_e32:
826+ define <8 x i32 > @shuffle_spread2_singlesrc_e32_index1 (<8 x i32 > %v ) {
827+ ; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index1:
828+ ; CHECK: # %bb.0:
829+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
830+ ; CHECK-NEXT: vwaddu.vv v10, v8, v8
831+ ; CHECK-NEXT: li a0, -1
832+ ; CHECK-NEXT: vwmaccu.vx v10, a0, v8
833+ ; CHECK-NEXT: vmv2r.v v8, v10
834+ ; CHECK-NEXT: ret
835+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 undef , i32 0 , i32 undef , i32 1 , i32 undef , i32 2 , i32 undef , i32 3 >
836+ ret <8 x i32 > %out
837+ }
838+
839+ define <8 x i32 > @shuffle_spread2_singlesrc_e32_index2 (<8 x i32 > %v ) {
840+ ; CHECK-LABEL: shuffle_spread2_singlesrc_e32_index2:
841+ ; CHECK: # %bb.0:
842+ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
843+ ; CHECK-NEXT: vid.v v10
844+ ; CHECK-NEXT: vsrl.vi v10, v10, 1
845+ ; CHECK-NEXT: vadd.vi v12, v10, -1
846+ ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
847+ ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
848+ ; CHECK-NEXT: vmv.v.v v8, v10
849+ ; CHECK-NEXT: ret
850+ %out = shufflevector <8 x i32 > %v , <8 x i32 > poison, <8 x i32 > <i32 undef , i32 undef , i32 0 , i32 undef , i32 1 , i32 undef , i32 2 , i32 undef >
851+ ret <8 x i32 > %out
852+ }
853+
854+ define <8 x i32 > @shuffle_spread3_singlesrc_e32 (<8 x i32 > %v ) {
855+ ; RV32-LABEL: shuffle_spread3_singlesrc_e32:
828856; RV32: # %bb.0:
829- ; RV32-NEXT: lui a0, %hi(.LCPI55_0 )
830- ; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0 )
857+ ; RV32-NEXT: lui a0, %hi(.LCPI57_0 )
858+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0 )
831859; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
832860; RV32-NEXT: vle16.v v12, (a0)
833861; RV32-NEXT: vrgatherei16.vv v10, v8, v12
834862; RV32-NEXT: vmv.v.v v8, v10
835863; RV32-NEXT: ret
836864;
837- ; RV64-LABEL: shuffle_decompress3_singlesrc_e32 :
865+ ; RV64-LABEL: shuffle_spread3_singlesrc_e32 :
838866; RV64: # %bb.0:
839867; RV64-NEXT: lui a0, 32769
840868; RV64-NEXT: slli a0, a0, 21
@@ -849,8 +877,8 @@ define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) {
849877}
850878
851879; TODO: This should be a single vslideup.vi
852- define <8 x i32 > @shuffle_decompress4_singlesrc_e32 (<8 x i32 > %v ) {
853- ; CHECK-LABEL: shuffle_decompress4_singlesrc_e32 :
880+ define <8 x i32 > @shuffle_spread4_singlesrc_e32 (<8 x i32 > %v ) {
881+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e32 :
854882; CHECK: # %bb.0:
855883; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
856884; CHECK-NEXT: vid.v v10
@@ -864,8 +892,8 @@ define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) {
864892}
865893
866894; TODO: This should be either a single vslideup.vi or two widening interleaves.
867- define <8 x i8 > @shuffle_decompress4_singlesrc_e8 (<8 x i8 > %v ) {
868- ; CHECK-LABEL: shuffle_decompress4_singlesrc_e8 :
895+ define <8 x i8 > @shuffle_spread4_singlesrc_e8 (<8 x i8 > %v ) {
896+ ; CHECK-LABEL: shuffle_spread4_singlesrc_e8 :
869897; CHECK: # %bb.0:
870898; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
871899; CHECK-NEXT: vid.v v9
@@ -877,11 +905,25 @@ define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) {
877905 ret <8 x i8 > %out
878906}
879907
908+ define <32 x i8 > @shuffle_spread8_singlesrc_e8 (<32 x i8 > %v ) {
909+ ; CHECK-LABEL: shuffle_spread8_singlesrc_e8:
910+ ; CHECK: # %bb.0:
911+ ; CHECK-NEXT: li a0, 32
912+ ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
913+ ; CHECK-NEXT: vid.v v10
914+ ; CHECK-NEXT: vsrl.vi v12, v10, 3
915+ ; CHECK-NEXT: vrgather.vv v10, v8, v12
916+ ; CHECK-NEXT: vmv.v.v v8, v10
917+ ; CHECK-NEXT: ret
918+ %out = shufflevector <32 x i8 > %v , <32 x i8 > poison, <32 x i32 > <i32 0 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
919+ ret <32 x i8 > %out
920+ }
921+
880922define <8 x i32 > @shuffle_decompress_singlesrc_e32 (<8 x i32 > %v ) {
881923; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
882924; CHECK: # %bb.0:
883- ; CHECK-NEXT: lui a0, %hi(.LCPI58_0 )
884- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0 )
925+ ; CHECK-NEXT: lui a0, %hi(.LCPI61_0 )
926+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0 )
885927; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
886928; CHECK-NEXT: vle16.v v12, (a0)
887929; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
0 commit comments