diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e197e80bfc00c..004dbcb7a0968 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4824,6 +4824,36 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, return convertFromScalableVector(VT, Vec, DAG, Subtarget); } +// Match a mask which "spreads" the leading elements of a vector evenly +// across the result. Factor is the spread amount, and Index is the +// offset applied. (on success, Index < Factor) This is the inverse +// of a deinterleave with the same Factor and Index. This is analogous +// to an interleave, except that all but one lane is undef. +static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { + SmallVector LaneIsUndef(Factor, true); + for (unsigned i = 0; i < Mask.size(); i++) + LaneIsUndef[i % Factor] &= (Mask[i] == -1); + + bool Found = false; + for (unsigned i = 0; i < Factor; i++) { + if (LaneIsUndef[i]) + continue; + if (Found) + return false; + Index = i; + Found = true; + } + if (!Found) + return false; + + for (unsigned i = 0; i < Mask.size() / Factor; i++) { + unsigned j = i * Factor + Index; + if (Mask[j] != -1 && (unsigned)Mask[j] != i) + return false; + } + return true; +} + // Given a vector a, b, c, d return a vector Factor times longer // with Factor-1 undef's between elements. Ex: // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0) @@ -5417,6 +5447,23 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, DAG.getUNDEF(VT)); } + // Match a spread(4,8) which can be done via extend and shift. Spread(2) + // is fully covered in interleave(2) above, so it is ignored here. + if (VT.getScalarSizeInBits() < Subtarget.getELen()) { + unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits(); + assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8); + for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) { + unsigned Index; + if (isSpreadMask(Mask, Factor, Index)) { + MVT NarrowVT = + MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor); + SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1, + DAG.getVectorIdxConstant(0, DL)); + return getWideningSpread(Src, Factor, Index, DL, DAG); + } + } + } + if (VT.getScalarSizeInBits() == 8 && any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) { // On such a vector we're unable to use i8 as the index type. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 6077c6d520fcf..1c6e1a37fa8af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -879,10 +879,8 @@ define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> @@ -892,11 +890,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vsll.vi v8, v9, 8 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out @@ -905,11 +901,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vsll.vi v8, v9, 16 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out @@ -918,11 +912,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vsll.vi v8, v9, 24 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out @@ -946,11 +938,8 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) { define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) { ; CHECK-LABEL: shuffle_spread8_singlesrc_e8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vsrl.vi v12, v10, 3 -; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32>