From 86fb7fcdae0a9e1d3b4e15ca6b05299f4dca6060 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 3 Dec 2024 08:44:13 -0800 Subject: [PATCH 1/2] [RISCV] Use zext and shift for spread(4,8) when types allow For a spread with an element type small enough, we can use a zext and shift to perform the shuffle. For e8, this covers spread(2,4,8), and for e16 covers spread(2,4). Note that spread(2) is already covered by the existing interleave logic, and is simply listed for completeness in the prior description. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 47 +++++++++++++++++++ .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 37 +++++---------- 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e197e80bfc00c..2ad61c8291001 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4824,6 +4824,36 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, return convertFromScalableVector(VT, Vec, DAG, Subtarget); } +// Match a mask which "spreads" the leading elements of a vector evenly +// across the result. Factor is the spread amount, and Index is the +// offset applied. (on success, Index < Factor) This is the inverse +// of a deinterleave with the same Factor and Index. This is analogous +// to a interleave, except that all but one lane is undef. +static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { + SmallVector LaneIsUndef(Factor, true); + for (unsigned i = 0; i < Mask.size(); i++) + LaneIsUndef[i % Factor] &= (Mask[i] == -1); + + bool Found = false; + for (unsigned i = 0; i < Factor; i++) { + if (LaneIsUndef[i]) + continue; + if (Found) + return false; + Index = i; + Found = true; + } + if (!Found) + return false; + + for (unsigned i = 0; i < Mask.size() / Factor; i++) { + unsigned j = i * Factor + Index; + if (Mask[j] != -1 && (unsigned)Mask[j] != i) + return false; + } + return true; +} + // Given a vector a, b, c, d return a vector Factor times longer // with Factor-1 undef's between elements. Ex: // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0) @@ -5417,6 +5447,23 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, DAG.getUNDEF(VT)); } + // Match a spread(4,8) which can be done via extend and shift. Spread(2) + // is fully covered in interleave(2) above, so it is ignored here. + if (VT.getScalarSizeInBits() < Subtarget.getELen()) { + unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits(); + assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8); + for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) { + unsigned Index; + if (isSpreadMask(Mask, Factor, Index)) { + MVT NarrowVT = + MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor); + SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1, + DAG.getVectorIdxConstant(0, DL)); + return getWideningSpread(Src, Factor, Index, DL, DAG); + } + } + } + if (VT.getScalarSizeInBits() == 8 && any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) { // On such a vector we're unable to use i8 as the index type. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 6077c6d520fcf..1c6e1a37fa8af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -879,10 +879,8 @@ define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> @@ -892,11 +890,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vsll.vi v8, v9, 8 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out @@ -905,11 +901,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vsll.vi v8, v9, 16 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out @@ -918,11 +912,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) { define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) { ; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsrl.vi v10, v9, 2 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vzext.vf4 v9, v8 +; CHECK-NEXT: vsll.vi v8, v9, 24 ; CHECK-NEXT: ret %out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> ret <16 x i8> %out @@ -946,11 +938,8 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) { define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) { ; CHECK-LABEL: shuffle_spread8_singlesrc_e8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vsrl.vi v12, v10, 3 -; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> From eff129455dcbac34f1c023ab69d10da6355f5e82 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 5 Dec 2024 16:30:23 -0800 Subject: [PATCH 2/2] type fix --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2ad61c8291001..004dbcb7a0968 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4828,7 +4828,7 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, // across the result. Factor is the spread amount, and Index is the // offset applied. (on success, Index < Factor) This is the inverse // of a deinterleave with the same Factor and Index. This is analogous -// to a interleave, except that all but one lane is undef. +// to an interleave, except that all but one lane is undef. static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { SmallVector LaneIsUndef(Factor, true); for (unsigned i = 0; i < Mask.size(); i++)