diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1495d38dda7ea..90376b375e275 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -756,8 +756,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, - ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, - ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM, + ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT}; static const unsigned IntegerVecReduceOps[] = { @@ -1112,6 +1111,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); setOperationAction(FloatingPointVPOps, VT, Custom); @@ -1420,6 +1421,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE, ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS}, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); setOperationAction({ISD::VECTOR_INTERLEAVE, ISD::VECTOR_DEINTERLEAVE}, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-float.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-float.ll similarity index 71% rename from llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-float.ll rename to llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-float.ll index 1d21cb5586984..ad84aaccc2171 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-float.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-float.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ ; RUN: < %s | FileCheck %s define <2 x double> @test_vp_reverse_v2f64_masked(<2 x double> %src, <2 x i1> %mask, i32 zeroext %evl) { @@ -88,3 +90,32 @@ define <4 x half> @test_vp_reverse_v4f16(<4 x half> %src, i32 zeroext %evl) { %dst = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> %src, <4 x i1> splat (i1 1), i32 %evl) ret <4 x half> %dst } + +define <4 x bfloat> @test_vp_reverse_v4bf16_masked(<4 x bfloat> %src, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4bf16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %dst = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> %src, <4 x i1> %mask, i32 %evl) + ret <4 x bfloat> %dst +} + +define <4 x bfloat> @test_vp_reverse_v4bf16(<4 x bfloat> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + + %dst = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> %src, <4 x i1> splat (i1 1), i32 %evl) + ret <4 x bfloat> %dst +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-int.ll similarity index 100% rename from llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-int.ll rename to llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-int.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll index d0562e2be346f..8160e62a43106 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh -verify-machineinstrs -riscv-v-vector-bits-min=128 \ -; RUN: < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i64: @@ -338,13 +340,64 @@ define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x floa } define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> %vb, <4 x i1> %mask, i32 zeroext %evl) { -; CHECK-LABEL: test_vp_splice_nxv2f16_with_firstelt: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; ZVFH-NEXT: vmv1r.v v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; ZVFHMIN-NEXT: vslideup.vi v9, v8, 1, v0.t +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret %va = insertelement <4 x half> poison, half %first, i32 0 %v = call <4 x half> @llvm.experimental.vp.splice.nxv2f16(<4 x half> %va, <4 x half> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) ret <4 x half> %v } + +define <8 x bfloat> @test_vp_splice_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + + %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @test_vp_splice_v8bf16_negative_offset(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8bf16_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + + %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @test_vp_splice_v8bf16_masked(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8bf16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret + %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) + ret <8 x bfloat> %v +}