diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e86d96bed4120..52dfaccf7098b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4441,48 +4441,58 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, } // Is this a shuffle extracts either the even or odd elements of a vector? -// That is, specifically, either (a) or (b) below. -// t34: v8i8 = extract_subvector t11, Constant:i64<0> -// t33: v8i8 = extract_subvector t11, Constant:i64<8> -// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 -// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 -// Returns {Src Vector, Even Elements} on success -static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, - SDValue V2, ArrayRef Mask, - const RISCVSubtarget &Subtarget) { +// That is, specifically, either (a) or (b) in the options below. +// Single operand shuffle is easy: +// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef +// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef +// Double operand shuffle: +// t34: v8i8 = extract_subvector t11, Constant:i64<0> +// t33: v8i8 = extract_subvector t11, Constant:i64<8> +// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 +// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 +static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, + SDValue V2, ArrayRef Mask, + const RISCVSubtarget &Subtarget) { // Need to be able to widen the vector. if (VT.getScalarSizeInBits() >= Subtarget.getELen()) - return false; + return SDValue(); + + // First index must be the first even or odd element from V1. + if (Mask[0] != 0 && Mask[0] != 1) + return SDValue(); + + // The others must increase by 2 each time. + for (unsigned i = 1; i != Mask.size(); ++i) + if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) + return SDValue(); + + if (1 == count_if(Mask, [](int Idx) { return Idx != -1; })) + return SDValue(); + + if (V2.isUndef() && + RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8) + return V1; // Both input must be extracts. if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return false; + return SDValue(); // Extracting from the same source. SDValue Src = V1.getOperand(0); if (Src != V2.getOperand(0)) - return false; + return SDValue(); // Src needs to have twice the number of elements. if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) - return false; + return SDValue(); // The extracts must extract the two halves of the source. if (V1.getConstantOperandVal(1) != 0 || V2.getConstantOperandVal(1) != Mask.size()) - return false; - - // First index must be the first even or odd element from V1. - if (Mask[0] != 0 && Mask[0] != 1) - return false; - - // The others must increase by 2 each time (or be undef). - for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) - return false; + return SDValue(); - return true; + return Src; } /// Is this shuffle interleaving contiguous elements from one vector into the @@ -4612,7 +4622,8 @@ static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, assert(Src.getSimpleValueType().isFixedLengthVector()); ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); - // The source is a vector of type + // The source is a vector of type (For the single source + // case, the high half is undefined) MVT SrcContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), ContainerVT.getVectorElementCount() * 2); @@ -5297,10 +5308,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // If this is a deinterleave and we can widen the vector, then we can use // vnsrl to deinterleave. - if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { - return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, - Subtarget, DAG); - } + if (SDValue Src = + isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) + return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, Subtarget, DAG); if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 1d7496397670f..7ac28f99fe860 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -9,29 +9,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v9, v10, v11 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v12, v11, -16 -; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: vrgather.vv v13, v10, v12 -; CHECK-NEXT: vadd.vi v10, v11, -15 -; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vadd.vi v9, v11, -16 +; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v9, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vadd.vi v11, v11, -15 +; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index 29038df652e71..aec2065905247 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -101,45 +101,39 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 1 -; RV32-NEXT: vmv.v.i v14, 6 +; RV32-NEXT: vmv.v.i v13, 6 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v14, v12, 1 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vadd.vv v12, v12, v12 -; RV32-NEXT: vadd.vi v15, v12, 1 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v15 +; RV32-NEXT: vslideup.vi v13, v12, 1 +; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 8 +; RV32-NEXT: vmv4r.v v20, v8 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vmv2r.v v22, v14 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.i v0, 10 -; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 8 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t -; RV32-NEXT: vmv1r.v v8, v12 +; RV32-NEXT: vnsrl.wx v8, v20, a0 +; RV32-NEXT: vrgatherei16.vv v8, v16, v13, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: v4i32_v16i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vadd.vv v12, v12, v12 -; RV64-NEXT: vadd.vi v14, v12, 1 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v12, v8, v14 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.i v0, 10 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 8 +; RV64-NEXT: vslidedown.vi v16, v8, 8 +; RV64-NEXT: vmv4r.v v20, v8 +; RV64-NEXT: li a0, 32 +; RV64-NEXT: vmv2r.v v22, v12 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vnsrl.wx v8, v20, a0 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v0, 10 ; RV64-NEXT: li a0, 3 ; RV64-NEXT: slli a0, a0, 33 ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t -; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: vrgatherei16.vv v8, v16, v10, v0.t ; RV64-NEXT: ret %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> ret <4 x i32> %2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 3af3540e1964b..15c2c2298c0dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -444,10 +444,8 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 @@ -461,10 +459,8 @@ define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 @@ -472,3 +468,24 @@ entry: store <8 x i8> %shuffle.i5, ptr %out, align 1 ret void } + +; Can't match the m8 result type as the source would have to be m16 which +; isn't a legal type. +define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i32_single_src_m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vadd.vv v16, v16, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 +; CHECK-NEXT: vse32.v v24, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <64 x i32>, ptr %in, align 4 + %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> + store <64 x i32> %shuffle.i5, ptr %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index b2973826d65de..075e463e41a6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -7,28 +7,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v11, v9, v9 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v9, v10, v11 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v11, v9, v9 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v12, v11, -16 -; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: vrgather.vv v13, v10, v12 -; CHECK-NEXT: vadd.vi v10, v11, -15 -; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vadd.vi v9, v11, -16 +; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v9, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vadd.vi v11, v11, -15 +; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec)