diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1b29f9bdc0d25..d2d79e5d831a6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5805,6 +5805,7 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) return true; + const unsigned NumElts = M.size(); MVT SVT = VT.getSimpleVT(); // Not for i1 vectors. @@ -5812,7 +5813,8 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { return false; int Dummy1, Dummy2; - return (isElementRotate(Dummy1, Dummy2, M) > 0) || + return ShuffleVectorInst::isReverseMask(M, NumElts) || + (isElementRotate(Dummy1, Dummy2, M) > 0) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index 71a15077be6eb..ddbf976553c21 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -839,14 +839,13 @@ define <4 x i8> @reverse_v4i8_2(<2 x i8> %a, <2 x i8> %b) { define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: reverse_v8i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: vrsub.vi v12, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: vrsub.vi v8, v11, 3 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 3 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> ret <8 x i8> %res @@ -855,17 +854,13 @@ define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) { define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: reverse_v16i8_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 7 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vrsub.vi v12, v11, 15 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vrsub.vi v8, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vslideup.vi v8, v11, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> ret <16 x i8> %res @@ -874,30 +869,14 @@ define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) { define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: reverse_v32i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a2 -; CHECK-NEXT: lui a2, 16 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v15, v8, v10 -; CHECK-NEXT: vrgatherei16.vv v14, v12, v10 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vrsub.vi v10, v10, 15 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v17, v13, v10 -; CHECK-NEXT: vrgather.vv v16, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v14, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 16 ; CHECK-NEXT: ret %res = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> ret <32 x i8> %res @@ -930,14 +909,13 @@ define <4 x i16> @reverse_v4i16_2(<2 x i16> %a, <2 x i16> %b) { define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: reverse_v8i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: vrsub.vi v12, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: vrsub.vi v8, v11, 3 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 3 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> ret <8 x i16> %res @@ -946,25 +924,13 @@ define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) { define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: reverse_v16i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vrsub.vi v10, v10, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vrgather.vv v13, v12, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v9, v9, a1 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrgather.vv v15, v8, v9 -; CHECK-NEXT: vrgather.vv v14, v10, v9 -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v14, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> ret <16 x i16> %res @@ -973,30 +939,23 @@ define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: reverse_v32i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 1 +; CHECK-NEXT: addi a0, a0, -16 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vx v10, v10, a1 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vrgather.vv v19, v8, v10 -; CHECK-NEXT: vrgather.vv v18, v9, v10 -; CHECK-NEXT: vrgather.vv v16, v11, v10 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: vrsub.vi v20, v8, 15 -; CHECK-NEXT: vmv1r.v v17, v16 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 ; CHECK-NEXT: vslidedown.vx v8, v16, a0 -; CHECK-NEXT: vrgather.vv v8, v12, v20, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 16 ; CHECK-NEXT: ret %res = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %res @@ -1029,27 +988,13 @@ define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) { define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: reverse_v8i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v10, v10, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v15, v11, v10 -; CHECK-NEXT: vrgatherei16.vv v14, v9, v10 -; CHECK-NEXT: srli a1, a0, 2 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vrsub.vx v10, v12, a1 -; CHECK-NEXT: vrgather.vv v13, v8, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v12, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v14, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> ret <8 x i32> %res @@ -1058,26 +1003,23 @@ define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: reverse_v16i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrsub.vi v16, v14, 7 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> ret <16 x i32> %res @@ -1086,32 +1028,27 @@ define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: reverse_v32i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: addi a0, a0, -16 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v20, v12, a1 -; CHECK-NEXT: vrgather.vv v15, v8, v20 -; CHECK-NEXT: vrgather.vv v14, v9, v20 -; CHECK-NEXT: vrgather.vv v13, v10, v20 -; CHECK-NEXT: vrgather.vv v12, v11, v20 -; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vrgather.vv v8, v9, v20 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vid.v v20 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vrsub.vi v24, v20, 15 -; CHECK-NEXT: vmv2r.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v23, v8, v24 +; CHECK-NEXT: vrgather.vv v19, v12, v24 +; CHECK-NEXT: vrgather.vv v22, v9, v24 +; CHECK-NEXT: vrgather.vv v18, v13, v24 +; CHECK-NEXT: vrgather.vv v21, v10, v24 +; CHECK-NEXT: vrgather.vv v17, v14, v24 +; CHECK-NEXT: vrgather.vv v20, v11, v24 +; CHECK-NEXT: vrgather.vv v16, v15, v24 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v20, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: ret %res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> ret <32 x i32> %res @@ -1135,28 +1072,23 @@ define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) { define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: reverse_v8i64_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v15 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v16, v15, 3 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> ret <8 x i64> %res @@ -1189,14 +1121,13 @@ define <4 x half> @reverse_v4f16_2(<2 x half> %a, <2 x half> %b) { define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-LABEL: reverse_v8f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: vrsub.vi v12, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: vrsub.vi v8, v11, 3 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 3 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> ret <8 x half> %res @@ -1205,25 +1136,13 @@ define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) { define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: reverse_v16f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vrsub.vi v10, v10, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vrgather.vv v13, v12, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v9, v9, a1 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrgather.vv v15, v8, v9 -; CHECK-NEXT: vrgather.vv v14, v10, v9 -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v14, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> ret <16 x half> %res @@ -1279,27 +1198,13 @@ define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) { define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: reverse_v8f32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v10, v10, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v15, v11, v10 -; CHECK-NEXT: vrgatherei16.vv v14, v9, v10 -; CHECK-NEXT: srli a1, a0, 2 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vrsub.vx v10, v12, a1 -; CHECK-NEXT: vrgather.vv v13, v8, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v12, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v14, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> ret <8 x float> %res @@ -1308,26 +1213,23 @@ define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: reverse_v16f32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrsub.vi v16, v14, 7 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> ret <16 x float> %res @@ -1351,28 +1253,23 @@ define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) { define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: reverse_v8f64_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v15 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v16, v15, 3 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> ret <8 x double> %res