Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}

setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE,
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::SCALAR_TO_VECTOR},
VT, Custom);

setOperationAction(
Expand Down Expand Up @@ -6514,9 +6515,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (VT.isFixedLengthVector())
ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);

SDValue V;
if (VT.isFloatingPoint()) {
V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);
} else {
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);
}
if (VT.isFixedLengthVector())
V = convertFromScalableVector(VT, V, DAG, Subtarget);
return V;
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s

define <8 x float> @fpext_v8bf16(<8 x bfloat> %x) {
; CHECK-LABEL: fpext_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: fmv.x.w a1, fa1
; CHECK-NEXT: fmv.x.w a2, fa2
; CHECK-NEXT: fmv.x.w a3, fa3
; CHECK-NEXT: fmv.x.w a4, fa4
; CHECK-NEXT: fmv.x.w a5, fa5
; CHECK-NEXT: fmv.x.w a6, fa6
; CHECK-NEXT: fmv.x.w a7, fa7
; CHECK-NEXT: slli a7, a7, 16
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a7
; CHECK-NEXT: slli a6, a6, 16
; CHECK-NEXT: vmv.s.x v9, a6
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a5, a5, 16
; CHECK-NEXT: vmv.s.x v8, a5
; CHECK-NEXT: slli a4, a4, 16
; CHECK-NEXT: vmv.s.x v10, a4
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 2
; CHECK-NEXT: slli a3, a3, 16
; CHECK-NEXT: vmv.s.x v8, a3
; CHECK-NEXT: slli a2, a2, 16
; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a1, a1, 16
; CHECK-NEXT: vmv.s.x v11, a1
; CHECK-NEXT: slli a0, a0, 16
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vslideup.vi v8, v11, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%y = fpext <8 x bfloat> %x to <8 x float>
ret <8 x float> %y
}

define <8 x float> @fpext_v8f16(<8 x bfloat> %x) {
; CHECK-LABEL: fpext_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: fmv.x.w a1, fa1
; CHECK-NEXT: fmv.x.w a2, fa2
; CHECK-NEXT: fmv.x.w a3, fa3
; CHECK-NEXT: fmv.x.w a4, fa4
; CHECK-NEXT: fmv.x.w a5, fa5
; CHECK-NEXT: fmv.x.w a6, fa6
; CHECK-NEXT: fmv.x.w a7, fa7
; CHECK-NEXT: slli a7, a7, 16
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a7
; CHECK-NEXT: slli a6, a6, 16
; CHECK-NEXT: vmv.s.x v9, a6
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a5, a5, 16
; CHECK-NEXT: vmv.s.x v8, a5
; CHECK-NEXT: slli a4, a4, 16
; CHECK-NEXT: vmv.s.x v10, a4
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 2
; CHECK-NEXT: slli a3, a3, 16
; CHECK-NEXT: vmv.s.x v8, a3
; CHECK-NEXT: slli a2, a2, 16
; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a1, a1, 16
; CHECK-NEXT: vmv.s.x v11, a1
; CHECK-NEXT: slli a0, a0, 16
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vslideup.vi v8, v11, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%y = fpext <8 x bfloat> %x to <8 x float>
ret <8 x float> %y
}

Original file line number Diff line number Diff line change
Expand Up @@ -412,30 +412,20 @@ declare <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7>, me
define <1 x half> @vsitofp_v1i7_v1f16(<1 x i7> %va) strictfp {
; RV32-LABEL: vsitofp_v1i7_v1f16:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: slli a0, a0, 25
; RV32-NEXT: srai a0, a0, 25
; RV32-NEXT: fcvt.h.w fa5, a0
; RV32-NEXT: fsh fa5, 14(sp)
; RV32-NEXT: addi a0, sp, 14
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vfmv.s.f v8, fa5
; RV32-NEXT: ret
;
; RV64-LABEL: vsitofp_v1i7_v1f16:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: slli a0, a0, 57
; RV64-NEXT: srai a0, a0, 57
; RV64-NEXT: fcvt.h.w fa5, a0
; RV64-NEXT: fsh fa5, 14(sp)
; RV64-NEXT: addi a0, sp, 14
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vfmv.s.f v8, fa5
; RV64-NEXT: ret
%evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <1 x half> %evec
Expand All @@ -445,15 +435,10 @@ declare <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7>, me
define <1 x half> @vuitofp_v1i7_v1f16(<1 x i7> %va) strictfp {
; CHECK-LABEL: vuitofp_v1i7_v1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: andi a0, a0, 127
; CHECK-NEXT: fcvt.h.wu fa5, a0
; CHECK-NEXT: fsh fa5, 14(sp)
; CHECK-NEXT: addi a0, sp, 14
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa5
; CHECK-NEXT: ret
%evec = call <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <1 x half> %evec
Expand Down
54 changes: 36 additions & 18 deletions llvm/test/CodeGen/RISCV/rvv/pr63596.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,54 @@ define <4 x float> @foo(ptr %0) nounwind {
; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: lhu s0, 0(a0)
; CHECK-NEXT: lhu s1, 2(a0)
; CHECK-NEXT: lhu s2, 4(a0)
; CHECK-NEXT: lhu a0, 6(a0)
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 4(sp)
; CHECK-NEXT: fmv.w.x fa0, s2
; CHECK-NEXT: fmv.w.x fa5, s2
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: fmv.s fa0, fa5
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 12(sp)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: fmv.w.x fa0, s1
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 8(sp)
; CHECK-NEXT: fmv.w.x fa0, s0
; CHECK-NEXT: fmv.w.x fa5, s0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: fmv.s fa0, fa5
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 0(sp)
; CHECK-NEXT: addi a0, sp, 4
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: addi a0, sp, 12
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vle32.v v11, (a0)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 1
; CHECK-NEXT: vslideup.vi v8, v11, 1
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're now spilling and reloading the vector due to the ABI but at least it gets rid of the one element unit strided loads.

; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
Expand Down
Loading