From 946ac97e610b659638b57092b592bc15095021f4 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 1 Nov 2024 11:04:57 -0700 Subject: [PATCH 1/3] [RISCV] Allow undef elements in isDeinterleaveShuffle This allows us to form vnsrl deinterleaves from non-power-of-two shuffles after they've been legalized to a power of two. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +-- .../rvv/fixed-vectors-interleaved-access.ll | 40 ++++--------------- .../RISCV/rvv/vector-deinterleave-fixed.ll | 3 -- 3 files changed, 10 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 920b06c7ba6ec..047f74f3e6db9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4475,10 +4475,10 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, if (Mask[0] != 0 && Mask[0] != 1) return false; - // The others must increase by 2 each time. - // TODO: Support undef elements? + // The others must increase by 2 each time (or be undef). for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != Mask[i - 1] + 2) + if (Mask[i] != Mask[i - 1] + 2 && + Mask[i] != -1) return false; return true; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 5911e8248f299..b3cc834c70a1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -12,46 +12,20 @@ define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vwaddu.vv v8, v10, v9 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: vwmaccu.vx v8, a0, v9 -; RV32-NEXT: vmv.v.i v0, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vv v9, v9, v9 -; RV32-NEXT: vadd.vi v11, v9, 1 -; RV32-NEXT: vrgather.vv v9, v10, v11 -; RV32-NEXT: vrgather.vi v9, v12, 1, v0.t +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vnsrl.wi v8, v10, 0 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vnsrl.wx v9, v10, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: load_factor2_v3: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0) +; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vadd.vi v8, v8, 1 -; RV64-NEXT: vrgather.vv v9, v10, v8 -; RV64-NEXT: vmv.v.i v0, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v10, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV64-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v11, v10, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v8, v10, v11 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vwmaccu.vx v8, a0, v11 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v8, v12, 0, v0.t +; RV64-NEXT: vnsrl.wx v9, v10, a0 +; RV64-NEXT: vnsrl.wi v8, v10, 0 ; RV64-NEXT: ret %interleaved.vec = load <6 x i32>, ptr %ptr %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index bc32518b67195..b2973826d65de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -269,6 +269,3 @@ declare {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float>) declare {<8 x half>, <8 x half>} @llvm.vector.deinterleave2.v16f16(<16 x half>) declare {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float>) declare {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double>) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32: {{.*}} -; RV64: {{.*}} From b0ee8ad3e570ff699ba6a7b39f0a333121bc971d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 1 Nov 2024 11:19:18 -0700 Subject: [PATCH 2/3] Clang-format --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 047f74f3e6db9..aab6d384062af 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4477,8 +4477,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, // The others must increase by 2 each time (or be undef). for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != Mask[i - 1] + 2 && - Mask[i] != -1) + if (Mask[i] != Mask[i - 1] + 2 && Mask[i] != -1) return false; return true; From 1d4695f9b4ff8b69eff6903dd4368be0fa7de38b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 4 Nov 2024 07:27:03 -0800 Subject: [PATCH 3/3] Fix bug in prior version and add additional tests --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- .../rvv/fixed-vectors-shufflevector-vnsrl.ll | 91 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index aab6d384062af..f0e559063ae6e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4477,7 +4477,7 @@ static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, // The others must increase by 2 each time (or be undef). for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != Mask[i - 1] + 2 && Mask[i] != -1) + if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) return false; return true; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index a3e50685889d1..f94494bbedde9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -347,3 +347,94 @@ entry: store <2 x double> %shuffle.i5, ptr %out, align 8 ret void } + +define void @vnsrl_0_i8_undef(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i8_undef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +define void @vnsrl_0_i8_undef2(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i8_undef2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +; TODO: Allow an undef initial element +define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i8_undef3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: lui a0, 24640 +; CHECK-NEXT: addi a0, a0, 6 +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: li a0, -32 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +} + +; Not a vnsrl (checking for a prior pattern matching bug) +define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) { +; CHECK-LABEL: vnsrl_0_i8_undef_negative: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 +; CHECK-NEXT: li a0, 48 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t +; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: ret +entry: + %0 = load <16 x i8>, ptr %in, align 1 + %shuffle.i5 = shufflevector <16 x i8> %0, <16 x i8> poison, <8 x i32> + store <8 x i8> %shuffle.i5, ptr %out, align 1 + ret void +}