From 115aeb0bb136a1e6dda2730ff0d497a133c1e0e2 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Feb 2025 14:20:51 -0800 Subject: [PATCH 1/2] [RISCV] Make single source reverse legal in isShuffleMaskLegal This enables DAG combines to form this mask. Reverse is generally linear in LMUL so this is reasonable, and results in better codegen for the 2 source variants. For <= m1, the change is only slightly profitable if at all. We trade some mask creation and an extract vrsub for a vslideup.vi. This is likely roughly neutral. At >= m2, this is distinctly profitable as generic DAG pushes the reverse into the two operands. We effectively already did this for one operand, but the other was hitting a full O(LMUL^2) shuffle. Moving that to be O(LMUL/2) operation is a big win. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 + .../rvv/fixed-vectors-shuffle-reverse.ll | 375 +++++++----------- 2 files changed, 141 insertions(+), 239 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1b29f9bdc0d25..3cdfa918771c5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5811,6 +5811,11 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (SVT.getScalarType() == MVT::i1) return false; + unsigned NumElts = M.size(); + if (ShuffleVectorInst::isReverseMask(M, NumElts) && + ShuffleVectorInst::isSingleSourceMask(M, NumElts)) + return true; + int Dummy1, Dummy2; return (isElementRotate(Dummy1, Dummy2, M) > 0) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index 71a15077be6eb..ddbf976553c21 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -839,14 +839,13 @@ define <4 x i8> @reverse_v4i8_2(<2 x i8> %a, <2 x i8> %b) { define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: reverse_v8i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: vrsub.vi v12, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: vrsub.vi v8, v11, 3 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 3 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> ret <8 x i8> %res @@ -855,17 +854,13 @@ define <8 x i8> @reverse_v8i8_2(<4 x i8> %a, <4 x i8> %b) { define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: reverse_v16i8_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 7 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vrsub.vi v12, v11, 15 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vrsub.vi v8, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vslideup.vi v8, v11, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> ret <16 x i8> %res @@ -874,30 +869,14 @@ define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) { define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: reverse_v32i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a2 -; CHECK-NEXT: lui a2, 16 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v15, v8, v10 -; CHECK-NEXT: vrgatherei16.vv v14, v12, v10 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vrsub.vi v10, v10, 15 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vrgather.vv v17, v13, v10 -; CHECK-NEXT: vrgather.vv v16, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v14, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 16 ; CHECK-NEXT: ret %res = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> ret <32 x i8> %res @@ -930,14 +909,13 @@ define <4 x i16> @reverse_v4i16_2(<2 x i16> %a, <2 x i16> %b) { define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: reverse_v8i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: vrsub.vi v12, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: vrsub.vi v8, v11, 3 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 3 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> ret <8 x i16> %res @@ -946,25 +924,13 @@ define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) { define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: reverse_v16i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vrsub.vi v10, v10, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vrgather.vv v13, v12, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v9, v9, a1 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrgather.vv v15, v8, v9 -; CHECK-NEXT: vrgather.vv v14, v10, v9 -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v14, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> ret <16 x i16> %res @@ -973,30 +939,23 @@ define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: reverse_v32i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 1 +; CHECK-NEXT: addi a0, a0, -16 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vx v10, v10, a1 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vrgather.vv v19, v8, v10 -; CHECK-NEXT: vrgather.vv v18, v9, v10 -; CHECK-NEXT: vrgather.vv v16, v11, v10 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: vrsub.vi v20, v8, 15 -; CHECK-NEXT: vmv1r.v v17, v16 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 ; CHECK-NEXT: vslidedown.vx v8, v16, a0 -; CHECK-NEXT: vrgather.vv v8, v12, v20, v0.t +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 16 ; CHECK-NEXT: ret %res = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %res @@ -1029,27 +988,13 @@ define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) { define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: reverse_v8i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v10, v10, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v15, v11, v10 -; CHECK-NEXT: vrgatherei16.vv v14, v9, v10 -; CHECK-NEXT: srli a1, a0, 2 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vrsub.vx v10, v12, a1 -; CHECK-NEXT: vrgather.vv v13, v8, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v12, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v14, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> ret <8 x i32> %res @@ -1058,26 +1003,23 @@ define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: reverse_v16i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrsub.vi v16, v14, 7 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> ret <16 x i32> %res @@ -1086,32 +1028,27 @@ define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: reverse_v32i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: addi a0, a0, -16 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v20, v12, a1 -; CHECK-NEXT: vrgather.vv v15, v8, v20 -; CHECK-NEXT: vrgather.vv v14, v9, v20 -; CHECK-NEXT: vrgather.vv v13, v10, v20 -; CHECK-NEXT: vrgather.vv v12, v11, v20 -; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vrgather.vv v8, v9, v20 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vid.v v20 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vrsub.vi v24, v20, 15 -; CHECK-NEXT: vmv2r.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v23, v8, v24 +; CHECK-NEXT: vrgather.vv v19, v12, v24 +; CHECK-NEXT: vrgather.vv v22, v9, v24 +; CHECK-NEXT: vrgather.vv v18, v13, v24 +; CHECK-NEXT: vrgather.vv v21, v10, v24 +; CHECK-NEXT: vrgather.vv v17, v14, v24 +; CHECK-NEXT: vrgather.vv v20, v11, v24 +; CHECK-NEXT: vrgather.vv v16, v15, v24 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v20, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: ret %res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> ret <32 x i32> %res @@ -1135,28 +1072,23 @@ define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) { define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: reverse_v8i64_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v15 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v16, v15, 3 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> ret <8 x i64> %res @@ -1189,14 +1121,13 @@ define <4 x half> @reverse_v4f16_2(<2 x half> %a, <2 x half> %b) { define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-LABEL: reverse_v8f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vid.v v11 -; CHECK-NEXT: vrsub.vi v12, v11, 7 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: vrsub.vi v8, v11, 3 -; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v10, v10, 3 +; CHECK-NEXT: vrgather.vv v11, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> ret <8 x half> %res @@ -1205,25 +1136,13 @@ define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) { define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: reverse_v16f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vrsub.vi v10, v10, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vrgather.vv v13, v12, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v9, v9, a1 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrgather.vv v15, v8, v9 -; CHECK-NEXT: vrgather.vv v14, v10, v9 -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v14, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x half> %a, <8 x half> %b, <16 x i32> ret <16 x half> %res @@ -1279,27 +1198,13 @@ define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) { define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: reverse_v8f32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v10, v10, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v15, v11, v10 -; CHECK-NEXT: vrgatherei16.vv v14, v9, v10 -; CHECK-NEXT: srli a1, a0, 2 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vrsub.vx v10, v12, a1 -; CHECK-NEXT: vrgather.vv v13, v8, v10 -; CHECK-NEXT: vrgather.vv v12, v9, v10 -; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrgather.vv v12, v8, v10 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v12, a0 -; CHECK-NEXT: vmerge.vvm v8, v8, v14, v0 +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> ret <8 x float> %res @@ -1308,26 +1213,23 @@ define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: reverse_v16f32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: li a1, 255 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vrsub.vi v16, v14, 7 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 8 ; CHECK-NEXT: ret %res = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> ret <16 x float> %res @@ -1351,28 +1253,23 @@ define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) { define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: reverse_v8f64_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: vrsub.vx v14, v10, a1 -; CHECK-NEXT: vrgather.vv v11, v8, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v15 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vv v8, v9, v14 -; CHECK-NEXT: vmv.v.i v0, 15 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v16, v15, 3 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v17, v10, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v16, v11, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v12, v14, a0 +; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vslideup.vi v8, v12, 4 ; CHECK-NEXT: ret %res = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> ret <8 x double> %res From 1512332e0470f513de26a6f96fff27319717bbdd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 5 Feb 2025 16:10:58 -0800 Subject: [PATCH 2/2] Address review comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3cdfa918771c5..d2d79e5d831a6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5805,19 +5805,16 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) return true; + const unsigned NumElts = M.size(); MVT SVT = VT.getSimpleVT(); // Not for i1 vectors. if (SVT.getScalarType() == MVT::i1) return false; - unsigned NumElts = M.size(); - if (ShuffleVectorInst::isReverseMask(M, NumElts) && - ShuffleVectorInst::isSingleSourceMask(M, NumElts)) - return true; - int Dummy1, Dummy2; - return (isElementRotate(Dummy1, Dummy2, M) > 0) || + return ShuffleVectorInst::isReverseMask(M, NumElts) || + (isElementRotate(Dummy1, Dummy2, M) > 0) || isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); }