diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index af7a39b2580a3..dbfca5c4a8b34 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1403,7 +1403,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, - ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, + ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + ISD::SCALAR_TO_VECTOR}, VT, Custom); setOperationAction( @@ -6514,9 +6515,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (VT.isFixedLengthVector()) ContainerVT = getContainerForFixedLengthVector(VT); SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); - SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), Scalar, VL); + + SDValue V; + if (VT.isFloatingPoint()) { + V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Scalar, VL); + } else { + Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); + V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Scalar, VL); + } if (VT.isFixedLengthVector()) V = convertFromScalableVector(VT, V, DAG, Subtarget); return V; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll new file mode 100644 index 0000000000000..4621f339ca882 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +define <8 x float> @fpext_v8bf16(<8 x bfloat> %x) { +; CHECK-LABEL: fpext_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a0, fa0 +; CHECK-NEXT: fmv.x.w a1, fa1 +; CHECK-NEXT: fmv.x.w a2, fa2 +; CHECK-NEXT: fmv.x.w a3, fa3 +; CHECK-NEXT: fmv.x.w a4, fa4 +; CHECK-NEXT: fmv.x.w a5, fa5 +; CHECK-NEXT: fmv.x.w a6, fa6 +; CHECK-NEXT: fmv.x.w a7, fa7 +; CHECK-NEXT: slli a7, a7, 16 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a7 +; CHECK-NEXT: slli a6, a6, 16 +; CHECK-NEXT: vmv.s.x v9, a6 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: slli a5, a5, 16 +; CHECK-NEXT: vmv.s.x v8, a5 +; CHECK-NEXT: slli a4, a4, 16 +; CHECK-NEXT: vmv.s.x v10, a4 +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v10, v9, 2 +; CHECK-NEXT: slli a3, a3, 16 +; CHECK-NEXT: vmv.s.x v8, a3 +; CHECK-NEXT: slli a2, a2, 16 +; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: slli a1, a1, 16 +; CHECK-NEXT: vmv.s.x v11, a1 +; CHECK-NEXT: slli a0, a0, 16 +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vslideup.vi v8, v11, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %y = fpext <8 x bfloat> %x to <8 x float> + ret <8 x float> %y +} + +define <8 x float> @fpext_v8f16(<8 x bfloat> %x) { +; CHECK-LABEL: fpext_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a0, fa0 +; CHECK-NEXT: fmv.x.w a1, fa1 +; CHECK-NEXT: fmv.x.w a2, fa2 +; CHECK-NEXT: fmv.x.w a3, fa3 +; CHECK-NEXT: fmv.x.w a4, fa4 +; CHECK-NEXT: fmv.x.w a5, fa5 +; CHECK-NEXT: fmv.x.w a6, fa6 +; CHECK-NEXT: fmv.x.w a7, fa7 +; CHECK-NEXT: slli a7, a7, 16 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a7 +; CHECK-NEXT: slli a6, a6, 16 +; CHECK-NEXT: vmv.s.x v9, a6 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: slli a5, a5, 16 +; CHECK-NEXT: vmv.s.x v8, a5 +; CHECK-NEXT: slli a4, a4, 16 +; CHECK-NEXT: vmv.s.x v10, a4 +; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v10, v9, 2 +; CHECK-NEXT: slli a3, a3, 16 +; CHECK-NEXT: vmv.s.x v8, a3 +; CHECK-NEXT: slli a2, a2, 16 +; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: slli a1, a1, 16 +; CHECK-NEXT: vmv.s.x v11, a1 +; CHECK-NEXT: slli a0, a0, 16 +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vslideup.vi v8, v11, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %y = fpext <8 x bfloat> %x to <8 x float> + ret <8 x float> %y +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll index 3dec7daf66ac9..5eb54fc7e299a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll @@ -412,30 +412,20 @@ declare <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7>, me define <1 x half> @vsitofp_v1i7_v1f16(<1 x i7> %va) strictfp { ; RV32-LABEL: vsitofp_v1i7_v1f16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: slli a0, a0, 25 ; RV32-NEXT: srai a0, a0, 25 ; RV32-NEXT: fcvt.h.w fa5, a0 -; RV32-NEXT: fsh fa5, 14(sp) -; RV32-NEXT: addi a0, sp, 14 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: ret ; ; RV64-LABEL: vsitofp_v1i7_v1f16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: slli a0, a0, 57 ; RV64-NEXT: srai a0, a0, 57 ; RV64-NEXT: fcvt.h.w fa5, a0 -; RV64-NEXT: fsh fa5, 14(sp) -; RV64-NEXT: addi a0, sp, 14 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: ret %evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <1 x half> %evec @@ -445,15 +435,10 @@ declare <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7>, me define <1 x half> @vuitofp_v1i7_v1f16(<1 x i7> %va) strictfp { ; CHECK-LABEL: vuitofp_v1i7_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: andi a0, a0, 127 ; CHECK-NEXT: fcvt.h.wu fa5, a0 -; CHECK-NEXT: fsh fa5, 14(sp) -; CHECK-NEXT: addi a0, sp, 14 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 ; CHECK-NEXT: ret %evec = call <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <1 x half> %evec diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll index 8bb62eaa8e9e9..dbd4224c7ef08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -9,36 +9,54 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: lhu s0, 0(a0) ; CHECK-NEXT: lhu s1, 2(a0) ; CHECK-NEXT: lhu s2, 4(a0) ; CHECK-NEXT: lhu a0, 6(a0) ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 4(sp) -; CHECK-NEXT: fmv.w.x fa0, s2 +; CHECK-NEXT: fmv.w.x fa5, s2 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: fmv.s fa0, fa5 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 12(sp) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: fmv.w.x fa0, s1 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 8(sp) -; CHECK-NEXT: fmv.w.x fa0, s0 +; CHECK-NEXT: fmv.w.x fa5, s0 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: fmv.s fa0, fa5 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fsw fa0, 0(sp) -; CHECK-NEXT: addi a0, sp, 4 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: addi a0, sp, 12 -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vle32.v v11, (a0) -; CHECK-NEXT: mv a0, sp -; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v9, 1 -; CHECK-NEXT: vslideup.vi v8, v11, 1 +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload