diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c53550ea3b23b..045b346abf341 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -18798,6 +18798,10 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1) return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(SrcMVT)) + return SDValue(); + // Check that destination type is large enough to hold result without // overflow. if (Opc == ISD::VECREDUCE_ADD) { @@ -18814,9 +18818,6 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, MVT ContainerVT = SrcMVT; if (SrcMVT.isFixedLengthVector()) { - if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget)) - return SDValue(); - ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget); Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll index ac1d63311fd1e..88894f887cc20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll @@ -1,13 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,V +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,V +; RUN: llc < %s -mtriple=riscv64 -mattr=+zve32x,+zvl128b,+zbb | FileCheck %s --check-prefixes=CHECK,ZVE define i32 @test_v2i1(<2 x i1> %x) { -; CHECK-LABEL: test_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret +; V-LABEL: test_v2i1: +; V: # %bb.0: +; V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; V-NEXT: vcpop.m a0, v0 +; V-NEXT: ret +; +; ZVE-LABEL: test_v2i1: +; ZVE: # %bb.0: +; ZVE-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZVE-NEXT: vcpop.m a0, v0 +; ZVE-NEXT: ret %a = zext <2 x i1> %x to <2 x i32> %b = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) ret i32 %b @@ -173,6 +180,35 @@ define i32 @test_v256i1(<256 x i1> %x) { ret i32 %b } +; FIXME: Optimize this case with Zve32x. We have to use mf4 and set the VL to +; VLEN/64. +define i32 @test_nxv1i1( %x) { +; V-LABEL: test_nxv1i1: +; V: # %bb.0: # %entry +; V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; V-NEXT: vcpop.m a0, v0 +; V-NEXT: ret +; +; ZVE-LABEL: test_nxv1i1: +; ZVE: # %bb.0: # %entry +; ZVE-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVE-NEXT: vmv.v.i v8, 0 +; ZVE-NEXT: csrr a0, vlenb +; ZVE-NEXT: srli a0, a0, 3 +; ZVE-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVE-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVE-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVE-NEXT: vmv.s.x v9, zero +; ZVE-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVE-NEXT: vredsum.vs v9, v8, v9 +; ZVE-NEXT: vmv.x.s a0, v9 +; ZVE-NEXT: ret +entry: + %a = zext %x to + %b = call i32 @llvm.vector.reduce.add.nxv1i32( %a) + ret i32 %b +} + define i32 @test_nxv2i1( %x) { ; CHECK-LABEL: test_nxv2i1: ; CHECK: # %bb.0: # %entry @@ -520,7 +556,3 @@ entry: %b = call i16 @llvm.vector.reduce.add.nxv64i16( %a) ret i16 %b } - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32: {{.*}} -; RV64: {{.*}}