From 63a7d9351c9a57adaae2b9c89de31569fe2d171f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 13 Jan 2025 12:10:56 -0800 Subject: [PATCH] [LegalizeVectorOps][RISCV] Use VP_FP_EXTEND/ROUND when promoting VP_FP* operations. This preserves the original VL leading to more reuse of VL for vsetvli. The VLOptimizer can also clean up a lot of this, but I'm not sure if it gets all of it. There are some regressions in here from propagating the mask too, but I'm not sure if that's a concern. --- .../SelectionDAG/LegalizeVectorOps.cpp | 23 +- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 544 +- .../RISCV/rvv/fixed-vectors-ceil-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-floor-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-round-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-roundeven-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll | 136 +- .../RISCV/rvv/fixed-vectors-vfadd-vp.ll | 148 +- .../RISCV/rvv/fixed-vectors-vfdiv-vp.ll | 148 +- .../RISCV/rvv/fixed-vectors-vfma-vp.ll | 168 +- .../RISCV/rvv/fixed-vectors-vfmax-vp.ll | 72 +- .../RISCV/rvv/fixed-vectors-vfmin-vp.ll | 72 +- .../RISCV/rvv/fixed-vectors-vfmul-vp.ll | 148 +- .../RISCV/rvv/fixed-vectors-vfsqrt-vp.ll | 64 +- .../RISCV/rvv/fixed-vectors-vfsub-vp.ll | 148 +- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 544 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 808 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 808 +- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 560 +- llvm/test/CodeGen/RISCV/rvv/rint-vp.ll | 542 +- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 544 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 544 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 544 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 612 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 592 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 9950 ++++++++--------- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 252 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 252 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 296 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll | 140 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 592 +- 33 files changed, 10284 insertions(+), 9783 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 89a00c5a4f043..a6d1b1cb7b104 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -737,7 +737,17 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl &Results) { .getVectorElementType() .isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) - Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); + if (ISD::isVPOpcode(Node->getOpcode())) { + unsigned EVLIdx = + *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); + unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); + Operands[j] = + DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j), + Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); + } else { + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); + } else Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j)); else @@ -750,8 +760,15 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl &Results) { if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, - DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); + if (ISD::isVPOpcode(Node->getOpcode())) { + unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); + unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); + Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res, + Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); + } else { + Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); + } else Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index a81f0ac982f56..1b9c78a20ec3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -17,23 +17,27 @@ declare @llvm.vp.ceil.nxv1bf16(, @vp_ceil_vv_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -42,12 +46,12 @@ define @vp_ceil_vv_nxv1bf16( %va, @vp_ceil_vv_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -55,7 +59,7 @@ define @vp_ceil_vv_nxv1bf16_unmasked( ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -67,23 +71,27 @@ declare @llvm.vp.ceil.nxv2bf16(, @vp_ceil_vv_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -92,12 +100,12 @@ define @vp_ceil_vv_nxv2bf16( %va, @vp_ceil_vv_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -105,7 +113,7 @@ define @vp_ceil_vv_nxv2bf16_unmasked( ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -117,25 +125,27 @@ declare @llvm.vp.ceil.nxv4bf16(, @vp_ceil_vv_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -144,12 +154,12 @@ define @vp_ceil_vv_nxv4bf16( %va, @vp_ceil_vv_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -157,7 +167,7 @@ define @vp_ceil_vv_nxv4bf16_unmasked( ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -169,25 +179,27 @@ declare @llvm.vp.ceil.nxv8bf16(, @vp_ceil_vv_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -196,12 +208,12 @@ define @vp_ceil_vv_nxv8bf16( %va, @vp_ceil_vv_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -209,7 +221,7 @@ define @vp_ceil_vv_nxv8bf16_unmasked( ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -221,25 +233,27 @@ declare @llvm.vp.ceil.nxv16bf16(, < define @vp_ceil_vv_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -248,12 +262,12 @@ define @vp_ceil_vv_nxv16bf16( %va, define @vp_ceil_vv_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -261,7 +275,7 @@ define @vp_ceil_vv_nxv16bf16_unmasked( @llvm.vp.ceil.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -279,59 +293,64 @@ define @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v0, a2 +; CHECK-NEXT: vslidedown.vx v17, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v18, v17 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -346,42 +365,55 @@ define @vp_ceil_vv_nxv32bf16( %va, define @vp_ceil_vv_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 3 @@ -390,8 +422,14 @@ define @vp_ceil_vv_nxv32bf16_unmasked( @llvm.vp.ceil.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -418,23 +456,27 @@ define @vp_ceil_vv_nxv1f16( %va, @llvm.vp.ceil.nxv1f16( %va, %m, i32 %evl) ret %v @@ -458,12 +500,12 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vp_ceil_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -471,7 +513,7 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.ceil.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -500,23 +542,27 @@ define @vp_ceil_vv_nxv2f16( %va, @llvm.vp.ceil.nxv2f16( %va, %m, i32 %evl) ret %v @@ -540,12 +586,12 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vp_ceil_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -553,7 +599,7 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.ceil.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -582,25 +628,27 @@ define @vp_ceil_vv_nxv4f16( %va, @llvm.vp.ceil.nxv4f16( %va, %m, i32 %evl) ret %v @@ -624,12 +672,12 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vp_ceil_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -637,7 +685,7 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.ceil.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -668,25 +716,27 @@ define @vp_ceil_vv_nxv8f16( %va, @llvm.vp.ceil.nxv8f16( %va, %m, i32 %evl) ret %v @@ -710,12 +760,12 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vp_ceil_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -723,7 +773,7 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.ceil.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -754,25 +804,27 @@ define @vp_ceil_vv_nxv16f16( %va, @llvm.vp.ceil.nxv16f16( %va, %m, i32 %evl) ret %v @@ -796,12 +848,12 @@ define @vp_ceil_vv_nxv16f16_unmasked( % ; ; ZVFHMIN-LABEL: vp_ceil_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -809,7 +861,7 @@ define @vp_ceil_vv_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.ceil.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -846,59 +898,64 @@ define @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16_unmasked( % ; ; ZVFHMIN-LABEL: vp_ceil_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v17, v16 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 -; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 @@ -972,8 +1042,14 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.ceil.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 5fe55583f314c..a9b255bb62aeb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -30,23 +30,27 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_ceil_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -70,12 +74,12 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_ceil_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -83,7 +87,7 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) @@ -112,23 +116,27 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_ceil_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -152,12 +160,12 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_ceil_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -165,7 +173,7 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) @@ -194,25 +202,27 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_ceil_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v9, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -236,12 +246,12 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_ceil_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -249,7 +259,7 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) @@ -280,25 +290,27 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ; ZVFHMIN-LABEL: vp_ceil_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -322,12 +334,12 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_ceil_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -335,7 +347,7 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 49255320c40a6..d500469003aea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -30,23 +30,27 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_floor_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -70,12 +74,12 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_floor_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -83,7 +87,7 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) @@ -112,23 +116,27 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_floor_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -152,12 +160,12 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_floor_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -165,7 +173,7 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) @@ -194,25 +202,27 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_floor_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v9, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -236,12 +246,12 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_floor_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -249,7 +259,7 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) @@ -280,25 +290,27 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_floor_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -322,12 +334,12 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_floor_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -335,7 +347,7 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index 11f92555f56cd..4f11e6c3c386a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -26,22 +26,20 @@ define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -60,18 +58,18 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) @@ -96,22 +94,20 @@ define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -130,18 +126,18 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) @@ -166,24 +162,22 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v14, v12, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vfmax.vv v12, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -202,18 +196,18 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) @@ -240,24 +234,22 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v12, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 +; ZVFHMIN-NEXT: vmerge.vvm v24, v20, v16, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -276,18 +268,18 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 3fb586b67a21b..2e2103ad5e06d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -26,22 +26,20 @@ define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.minimum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -60,18 +58,18 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.minimum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) @@ -96,22 +94,20 @@ define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v9, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.minimum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -130,18 +126,18 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.minimum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) @@ -166,24 +162,22 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v14, v12, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vfmin.vv v12, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.minimum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -202,18 +196,18 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.minimum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) @@ -240,24 +234,22 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v12, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 +; ZVFHMIN-NEXT: vmerge.vvm v24, v20, v16, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.minimum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -276,18 +268,18 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.minimum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 232a8a4827cb1..a4ff079846fd8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -30,23 +30,27 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_round_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -70,12 +74,12 @@ define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_round_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -83,7 +87,7 @@ define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) @@ -112,23 +116,27 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_round_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -152,12 +160,12 @@ define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_round_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -165,7 +173,7 @@ define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) @@ -194,25 +202,27 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_round_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v9, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -236,12 +246,12 @@ define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vp_round_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -249,7 +259,7 @@ define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) @@ -280,25 +290,27 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_round_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -322,12 +334,12 @@ define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_round_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -335,7 +347,7 @@ define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 7c80c037403c2..c28d5fb1a8193 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -30,23 +30,27 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_roundeven_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -70,12 +74,12 @@ define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_roundeven_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -83,7 +87,7 @@ define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) @@ -112,23 +116,27 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_roundeven_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -152,12 +160,12 @@ define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_roundeven_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -165,7 +173,7 @@ define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) @@ -194,25 +202,27 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_roundeven_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v9, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -236,12 +246,12 @@ define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_roundeven_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -249,7 +259,7 @@ define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) @@ -280,25 +290,27 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ; ZVFHMIN-LABEL: vp_roundeven_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -322,12 +334,12 @@ define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; ; ZVFHMIN-LABEL: vp_roundeven_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -335,7 +347,7 @@ define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 65a4725267cd3..64d3664a4c372 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -30,23 +30,27 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext ; ; ZVFHMIN-LABEL: vp_roundtozero_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -70,12 +74,12 @@ define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %ev ; ; ZVFHMIN-LABEL: vp_roundtozero_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -83,7 +87,7 @@ define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) @@ -112,23 +116,27 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext ; ; ZVFHMIN-LABEL: vp_roundtozero_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -152,12 +160,12 @@ define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %ev ; ; ZVFHMIN-LABEL: vp_roundtozero_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -165,7 +173,7 @@ define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) @@ -194,25 +202,27 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ; ZVFHMIN-LABEL: vp_roundtozero_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v9, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -236,12 +246,12 @@ define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %ev ; ; ZVFHMIN-LABEL: vp_roundtozero_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -249,7 +259,7 @@ define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) @@ -280,25 +290,27 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ; ZVFHMIN-LABEL: vp_roundtozero_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v10, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -322,12 +334,12 @@ define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext ; ; ZVFHMIN-LABEL: vp_roundtozero_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -335,7 +347,7 @@ define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll index 7a7236235d120..f80c158324684 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll @@ -19,13 +19,13 @@ define <2 x half> @vfadd_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfadd_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -40,12 +40,12 @@ define <2 x half> @vfadd_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfadd_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) @@ -64,12 +64,13 @@ define <2 x half> @vfadd_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer @@ -89,11 +90,12 @@ define <2 x half> @vfadd_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 @@ -113,13 +115,13 @@ define <3 x half> @vfadd_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfadd_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fadd.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v @@ -136,13 +138,13 @@ define <4 x half> @vfadd_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfadd_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -157,12 +159,12 @@ define <4 x half> @vfadd_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfadd_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) @@ -181,12 +183,13 @@ define <4 x half> @vfadd_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer @@ -206,11 +209,12 @@ define <4 x half> @vfadd_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 @@ -230,13 +234,13 @@ define <8 x half> @vfadd_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfadd_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -251,12 +255,12 @@ define <8 x half> @vfadd_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfadd_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) @@ -275,12 +279,13 @@ define <8 x half> @vfadd_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -300,11 +305,12 @@ define <8 x half> @vfadd_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -324,13 +330,13 @@ define <16 x half> @vfadd_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> % ; ; ZVFHMIN-LABEL: vfadd_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -345,12 +351,12 @@ define <16 x half> @vfadd_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ; ZVFHMIN-LABEL: vfadd_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) @@ -369,12 +375,13 @@ define <16 x half> @vfadd_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer @@ -394,11 +401,12 @@ define <16 x half> @vfadd_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll index cb83e5ff4f2b3..23baa60de1532 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll @@ -19,13 +19,13 @@ define <2 x half> @vfdiv_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfdiv_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -40,12 +40,12 @@ define <2 x half> @vfdiv_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfdiv_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) @@ -64,12 +64,13 @@ define <2 x half> @vfdiv_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer @@ -89,11 +90,12 @@ define <2 x half> @vfdiv_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 @@ -113,13 +115,13 @@ define <3 x half> @vfdiv_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfdiv_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fdiv.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v @@ -136,13 +138,13 @@ define <4 x half> @vfdiv_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfdiv_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -157,12 +159,12 @@ define <4 x half> @vfdiv_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfdiv_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) @@ -181,12 +183,13 @@ define <4 x half> @vfdiv_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer @@ -206,11 +209,12 @@ define <4 x half> @vfdiv_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 @@ -230,13 +234,13 @@ define <8 x half> @vfdiv_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfdiv_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -251,12 +255,12 @@ define <8 x half> @vfdiv_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfdiv_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) @@ -275,12 +279,13 @@ define <8 x half> @vfdiv_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -300,11 +305,12 @@ define <8 x half> @vfdiv_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -324,13 +330,13 @@ define <16 x half> @vfdiv_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> % ; ; ZVFHMIN-LABEL: vfdiv_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -345,12 +351,12 @@ define <16 x half> @vfdiv_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ; ZVFHMIN-LABEL: vfdiv_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) @@ -369,12 +375,13 @@ define <16 x half> @vfdiv_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer @@ -394,11 +401,12 @@ define <16 x half> @vfdiv_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index c61f9cd9b5bd7..bde842dcc7600 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -20,14 +20,14 @@ define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, < ; ; ZVFHMIN-LABEL: vfma_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -42,13 +42,13 @@ define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x ha ; ; ZVFHMIN-LABEL: vfma_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl) @@ -65,15 +65,17 @@ define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i ; ZVFHMIN-LABEL: vfma_vf_v2f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer @@ -91,14 +93,16 @@ define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %v ; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 @@ -119,14 +123,14 @@ define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, < ; ; ZVFHMIN-LABEL: vfma_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -141,13 +145,13 @@ define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x ha ; ; ZVFHMIN-LABEL: vfma_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl) @@ -164,15 +168,17 @@ define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i ; ZVFHMIN-LABEL: vfma_vf_v4f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer @@ -190,14 +196,16 @@ define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %v ; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 @@ -218,14 +226,14 @@ define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, < ; ; ZVFHMIN-LABEL: vfma_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -240,13 +248,13 @@ define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x ha ; ; ZVFHMIN-LABEL: vfma_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl) @@ -263,15 +271,17 @@ define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i ; ZVFHMIN-LABEL: vfma_vf_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -289,14 +299,16 @@ define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %v ; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -317,14 +329,14 @@ define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> ; ; ZVFHMIN-LABEL: vfma_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -339,13 +351,13 @@ define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 ; ; ZVFHMIN-LABEL: vfma_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl) @@ -362,15 +374,17 @@ define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <1 ; ZVFHMIN-LABEL: vfma_vf_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer @@ -388,14 +402,16 @@ define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half ; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index cad7adbc19f3c..b37c47a32ba21 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -19,13 +19,13 @@ define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -40,12 +40,12 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) @@ -63,13 +63,13 @@ define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -84,12 +84,12 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) @@ -107,13 +107,13 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -128,12 +128,12 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) @@ -151,13 +151,13 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -172,12 +172,12 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index d8ee7a7044b49..261523e8ace50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -19,13 +19,13 @@ define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -40,12 +40,12 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) @@ -63,13 +63,13 @@ define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -84,12 +84,12 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) @@ -107,13 +107,13 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -128,12 +128,12 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) @@ -151,13 +151,13 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -172,12 +172,12 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll index 86f140723d7f8..7e03a3cf95577 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll @@ -19,13 +19,13 @@ define <2 x half> @vfmul_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfmul_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -40,12 +40,12 @@ define <2 x half> @vfmul_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfmul_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) @@ -64,12 +64,13 @@ define <2 x half> @vfmul_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer @@ -89,11 +90,12 @@ define <2 x half> @vfmul_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 @@ -113,13 +115,13 @@ define <3 x half> @vfmul_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfmul_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fmul.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v @@ -136,13 +138,13 @@ define <4 x half> @vfmul_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfmul_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -157,12 +159,12 @@ define <4 x half> @vfmul_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfmul_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) @@ -181,12 +183,13 @@ define <4 x half> @vfmul_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer @@ -206,11 +209,12 @@ define <4 x half> @vfmul_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 @@ -230,13 +234,13 @@ define <8 x half> @vfmul_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfmul_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -251,12 +255,12 @@ define <8 x half> @vfmul_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfmul_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) @@ -275,12 +279,13 @@ define <8 x half> @vfmul_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -300,11 +305,12 @@ define <8 x half> @vfmul_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -324,13 +330,13 @@ define <16 x half> @vfmul_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> % ; ; ZVFHMIN-LABEL: vfmul_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -345,12 +351,12 @@ define <16 x half> @vfmul_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ; ZVFHMIN-LABEL: vfmul_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) @@ -369,12 +375,13 @@ define <16 x half> @vfmul_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer @@ -394,11 +401,12 @@ define <16 x half> @vfmul_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll index c1e63cbf0b138..6244419de65b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -19,12 +19,12 @@ define <2 x half> @vfsqrt_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl ; ; ZVFHMIN-LABEL: vfsqrt_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -39,11 +39,11 @@ define <2 x half> @vfsqrt_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfsqrt_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v9, v9 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) @@ -61,12 +61,12 @@ define <4 x half> @vfsqrt_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl ; ; ZVFHMIN-LABEL: vfsqrt_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -81,11 +81,11 @@ define <4 x half> @vfsqrt_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfsqrt_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v9, v9 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) @@ -103,12 +103,12 @@ define <8 x half> @vfsqrt_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl ; ; ZVFHMIN-LABEL: vfsqrt_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -123,11 +123,11 @@ define <8 x half> @vfsqrt_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfsqrt_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) @@ -145,12 +145,12 @@ define <16 x half> @vfsqrt_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext ; ; ZVFHMIN-LABEL: vfsqrt_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -165,11 +165,11 @@ define <16 x half> @vfsqrt_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfsqrt_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v12, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll index d0a0bf516d355..58a510047d625 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll @@ -19,13 +19,13 @@ define <2 x half> @vfsub_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfsub_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -40,12 +40,12 @@ define <2 x half> @vfsub_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfsub_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) @@ -64,12 +64,13 @@ define <2 x half> @vfsub_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer @@ -89,11 +90,12 @@ define <2 x half> @vfsub_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 @@ -113,13 +115,13 @@ define <3 x half> @vfsub_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfsub_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fsub.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) ret <3 x half> %v @@ -136,13 +138,13 @@ define <4 x half> @vfsub_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfsub_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -157,12 +159,12 @@ define <4 x half> @vfsub_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfsub_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) @@ -181,12 +183,13 @@ define <4 x half> @vfsub_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer @@ -206,11 +209,12 @@ define <4 x half> @vfsub_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 @@ -230,13 +234,13 @@ define <8 x half> @vfsub_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfsub_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -251,12 +255,12 @@ define <8 x half> @vfsub_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ; ZVFHMIN-LABEL: vfsub_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) @@ -275,12 +279,13 @@ define <8 x half> @vfsub_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zero ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -300,11 +305,12 @@ define <8 x half> @vfsub_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -324,13 +330,13 @@ define <16 x half> @vfsub_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> % ; ; ZVFHMIN-LABEL: vfsub_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -345,12 +351,12 @@ define <16 x half> @vfsub_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ; ZVFHMIN-LABEL: vfsub_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) @@ -369,12 +375,13 @@ define <16 x half> @vfsub_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer @@ -394,11 +401,12 @@ define <16 x half> @vfsub_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 74a00c655d526..f9b5095c9af1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -17,23 +17,27 @@ declare @llvm.vp.floor.nxv1bf16(, @vp_floor_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -42,12 +46,12 @@ define @vp_floor_nxv1bf16( %va, @vp_floor_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -55,7 +59,7 @@ define @vp_floor_nxv1bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -67,23 +71,27 @@ declare @llvm.vp.floor.nxv2bf16(, @vp_floor_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -92,12 +100,12 @@ define @vp_floor_nxv2bf16( %va, @vp_floor_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -105,7 +113,7 @@ define @vp_floor_nxv2bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -117,25 +125,27 @@ declare @llvm.vp.floor.nxv4bf16(, @vp_floor_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -144,12 +154,12 @@ define @vp_floor_nxv4bf16( %va, @vp_floor_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -157,7 +167,7 @@ define @vp_floor_nxv4bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -169,25 +179,27 @@ declare @llvm.vp.floor.nxv8bf16(, @vp_floor_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -196,12 +208,12 @@ define @vp_floor_nxv8bf16( %va, @vp_floor_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -209,7 +221,7 @@ define @vp_floor_nxv8bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -221,25 +233,27 @@ declare @llvm.vp.floor.nxv16bf16(, define @vp_floor_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -248,12 +262,12 @@ define @vp_floor_nxv16bf16( %va, @vp_floor_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -261,7 +275,7 @@ define @vp_floor_nxv16bf16_unmasked( @llvm.vp.floor.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -279,59 +293,64 @@ define @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 @@ -390,8 +422,14 @@ define @vp_floor_nxv32bf16_unmasked( @llvm.vp.floor.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -418,23 +456,27 @@ define @vp_floor_nxv1f16( %va, @llvm.vp.floor.nxv1f16( %va, %m, i32 %evl) ret %v @@ -458,12 +500,12 @@ define @vp_floor_nxv1f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_floor_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -471,7 +513,7 @@ define @vp_floor_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -500,23 +542,27 @@ define @vp_floor_nxv2f16( %va, @llvm.vp.floor.nxv2f16( %va, %m, i32 %evl) ret %v @@ -540,12 +586,12 @@ define @vp_floor_nxv2f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_floor_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -553,7 +599,7 @@ define @vp_floor_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -582,25 +628,27 @@ define @vp_floor_nxv4f16( %va, @llvm.vp.floor.nxv4f16( %va, %m, i32 %evl) ret %v @@ -624,12 +672,12 @@ define @vp_floor_nxv4f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_floor_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -637,7 +685,7 @@ define @vp_floor_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -668,25 +716,27 @@ define @vp_floor_nxv8f16( %va, @llvm.vp.floor.nxv8f16( %va, %m, i32 %evl) ret %v @@ -710,12 +760,12 @@ define @vp_floor_nxv8f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_floor_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -723,7 +773,7 @@ define @vp_floor_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -754,25 +804,27 @@ define @vp_floor_nxv16f16( %va, @llvm.vp.floor.nxv16f16( %va, %m, i32 %evl) ret %v @@ -796,12 +848,12 @@ define @vp_floor_nxv16f16_unmasked( %va ; ; ZVFHMIN-LABEL: vp_floor_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -809,7 +861,7 @@ define @vp_floor_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -846,59 +898,64 @@ define @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vp_floor_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v17, v16 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 -; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 2 @@ -972,8 +1042,14 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.floor.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index 7e0c3f45de463..d56e46f7db3ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -21,20 +21,18 @@ define @vfmax_vv_nxv1bf16( %va, @llvm.vp.maximum.nxv1bf16( %va, %vb, %m, i32 %evl) ret %v @@ -54,7 +52,7 @@ define @vfmax_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmax.vv v9, v8, v9 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -68,20 +66,18 @@ define @vfmax_vv_nxv2bf16( %va, @llvm.vp.maximum.nxv2bf16( %va, %vb, %m, i32 %evl) ret %v @@ -101,7 +97,7 @@ define @vfmax_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmax.vv v9, v8, v9 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv2bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -115,22 +111,20 @@ define @vfmax_vv_nxv4bf16( %va, @llvm.vp.maximum.nxv4bf16( %va, %vb, %m, i32 %evl) ret %v @@ -150,7 +144,7 @@ define @vfmax_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 ; CHECK-NEXT: vfmax.vv v10, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv4bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -164,22 +158,20 @@ define @vfmax_vv_nxv8bf16( %va, @llvm.vp.maximum.nxv8bf16( %va, %vb, %m, i32 %evl) ret %v @@ -199,7 +191,7 @@ define @vfmax_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 ; CHECK-NEXT: vfmax.vv v12, v12, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv8bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -214,32 +206,58 @@ define @vfmax_vv_nxv16bf16( %va, @vfmax_vv_nxv16bf16_unmasked( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv1f16( %va, @llvm.vp.maximum.nxv1f16( %va, %vb, %m, i32 %evl) ret %v @@ -633,7 +643,7 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maximum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) @@ -660,20 +670,18 @@ define @vfmax_vv_nxv2f16( %va, @llvm.vp.maximum.nxv2f16( %va, %vb, %m, i32 %evl) ret %v @@ -703,7 +711,7 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maximum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) @@ -730,22 +738,20 @@ define @vfmax_vv_nxv4f16( %va, @llvm.vp.maximum.nxv4f16( %va, %vb, %m, i32 %evl) ret %v @@ -775,7 +781,7 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maximum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) @@ -804,22 +810,20 @@ define @vfmax_vv_nxv8f16( %va, @llvm.vp.maximum.nxv8f16( %va, %vb, %m, i32 %evl) ret %v @@ -849,7 +853,7 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maximum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) @@ -879,32 +883,58 @@ define @vfmax_vv_nxv16f16( %va, @vfmax_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 @@ -997,147 +1027,136 @@ define @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x19, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 25 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v0, v8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmset.m v24 +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v8, v24, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v24, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 +; ZVFHMIN-NEXT: vmv1r.v v0, v24 ; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vmv8r.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 4 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v25, v16, v16, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: li a3, 24 +; ZVFHMIN-NEXT: mul a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v13, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v13 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v24 ; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -1236,48 +1261,47 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index d877073c3487d..81e4a548f560e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -21,20 +21,18 @@ define @vfmin_vv_nxv1bf16( %va, @llvm.vp.minimum.nxv1bf16( %va, %vb, %m, i32 %evl) ret %v @@ -54,7 +52,7 @@ define @vfmin_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmin.vv v9, v8, v9 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -68,20 +66,18 @@ define @vfmin_vv_nxv2bf16( %va, @llvm.vp.minimum.nxv2bf16( %va, %vb, %m, i32 %evl) ret %v @@ -101,7 +97,7 @@ define @vfmin_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: vfmin.vv v9, v8, v9 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv2bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -115,22 +111,20 @@ define @vfmin_vv_nxv4bf16( %va, @llvm.vp.minimum.nxv4bf16( %va, %vb, %m, i32 %evl) ret %v @@ -150,7 +144,7 @@ define @vfmin_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 ; CHECK-NEXT: vfmin.vv v10, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv4bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -164,22 +158,20 @@ define @vfmin_vv_nxv8bf16( %va, @llvm.vp.minimum.nxv8bf16( %va, %vb, %m, i32 %evl) ret %v @@ -199,7 +191,7 @@ define @vfmin_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 ; CHECK-NEXT: vfmin.vv v12, v12, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv8bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -214,32 +206,58 @@ define @vfmin_vv_nxv16bf16( %va, @vfmin_vv_nxv16bf16_unmasked( @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv1f16( %va, @llvm.vp.minimum.nxv1f16( %va, %vb, %m, i32 %evl) ret %v @@ -633,7 +643,7 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minimum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) @@ -660,20 +670,18 @@ define @vfmin_vv_nxv2f16( %va, @llvm.vp.minimum.nxv2f16( %va, %vb, %m, i32 %evl) ret %v @@ -703,7 +711,7 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minimum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) @@ -730,22 +738,20 @@ define @vfmin_vv_nxv4f16( %va, @llvm.vp.minimum.nxv4f16( %va, %vb, %m, i32 %evl) ret %v @@ -775,7 +781,7 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minimum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) @@ -804,22 +810,20 @@ define @vfmin_vv_nxv8f16( %va, @llvm.vp.minimum.nxv8f16( %va, %vb, %m, i32 %evl) ret %v @@ -849,7 +853,7 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minimum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) @@ -879,32 +883,58 @@ define @vfmin_vv_nxv16f16( %va, @vfmin_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 @@ -997,147 +1027,136 @@ define @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x19, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 25 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v0, v8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmset.m v24 +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v8, v24, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v24, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 +; ZVFHMIN-NEXT: vmv1r.v v0, v24 ; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vmv8r.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 4 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v25, v16, v16, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: li a3, 24 +; ZVFHMIN-NEXT: mul a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v25 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v13, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v13 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v24 ; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v24 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -1236,48 +1261,47 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 9aa26e59c6a03..7d3700492ea7b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -17,22 +17,26 @@ declare @llvm.vp.nearbyint.nxv1bf16(, define @vp_nearbyint_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1bf16( %va, %m, i32 %evl) @@ -42,19 +46,19 @@ define @vp_nearbyint_nxv1bf16( %va, < define @vp_nearbyint_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret @@ -67,22 +71,26 @@ declare @llvm.vp.nearbyint.nxv2bf16(, define @vp_nearbyint_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2bf16( %va, %m, i32 %evl) @@ -92,19 +100,19 @@ define @vp_nearbyint_nxv2bf16( %va, < define @vp_nearbyint_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret @@ -117,24 +125,26 @@ declare @llvm.vp.nearbyint.nxv4bf16(, define @vp_nearbyint_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4bf16( %va, %m, i32 %evl) @@ -144,19 +154,19 @@ define @vp_nearbyint_nxv4bf16( %va, < define @vp_nearbyint_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret @@ -169,24 +179,26 @@ declare @llvm.vp.nearbyint.nxv8bf16(, define @vp_nearbyint_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8bf16( %va, %m, i32 %evl) @@ -196,19 +208,19 @@ define @vp_nearbyint_nxv8bf16( %va, < define @vp_nearbyint_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret @@ -221,24 +233,26 @@ declare @llvm.vp.nearbyint.nxv16bf16( @vp_nearbyint_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16bf16( %va, %m, i32 %evl) @@ -248,19 +262,19 @@ define @vp_nearbyint_nxv16bf16( %va define @vp_nearbyint_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret @@ -273,55 +287,76 @@ declare @llvm.vp.nearbyint.nxv32bf16( @vp_nearbyint_nxv32bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v0, a2 +; CHECK-NEXT: vslidedown.vx v17, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v18, v17 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv32bf16( %va, %m, i32 %evl) ret %v @@ -330,42 +365,55 @@ define @vp_nearbyint_nxv32bf16( %va define @vp_nearbyint_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: frflags a0 @@ -373,9 +421,15 @@ define @vp_nearbyint_nxv32bf16_unmasked( @llvm.vp.nearbyint.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -402,22 +456,26 @@ define @vp_nearbyint_nxv1f16( %va, @llvm.vp.nearbyint.nxv1f16( %va, %m, i32 %evl) @@ -442,19 +500,19 @@ define @vp_nearbyint_nxv1f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret @@ -484,22 +542,26 @@ define @vp_nearbyint_nxv2f16( %va, @llvm.vp.nearbyint.nxv2f16( %va, %m, i32 %evl) @@ -524,19 +586,19 @@ define @vp_nearbyint_nxv2f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret @@ -566,24 +628,26 @@ define @vp_nearbyint_nxv4f16( %va, @llvm.vp.nearbyint.nxv4f16( %va, %m, i32 %evl) @@ -608,19 +672,19 @@ define @vp_nearbyint_nxv4f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret @@ -652,24 +716,26 @@ define @vp_nearbyint_nxv8f16( %va, @llvm.vp.nearbyint.nxv8f16( %va, %m, i32 %evl) @@ -694,19 +760,19 @@ define @vp_nearbyint_nxv8f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret @@ -738,24 +804,26 @@ define @vp_nearbyint_nxv16f16( %va, @llvm.vp.nearbyint.nxv16f16( %va, %m, i32 %evl) @@ -780,19 +848,19 @@ define @vp_nearbyint_nxv16f16_unmasked( ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret @@ -824,55 +892,76 @@ define @vp_nearbyint_nxv32f16( %va, @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) ret %v @@ -896,42 +985,55 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v17, v16 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t -; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: frflags a0 @@ -939,9 +1041,15 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: fsflags a0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index 70ea1bc78d2e5..f044eceb9f930 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -17,21 +17,25 @@ declare @llvm.vp.rint.nxv1bf16(, @vp_rint_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -40,18 +44,18 @@ define @vp_rint_nxv1bf16( %va, @vp_rint_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -63,21 +67,25 @@ declare @llvm.vp.rint.nxv2bf16(, @vp_rint_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -86,18 +94,18 @@ define @vp_rint_nxv2bf16( %va, @vp_rint_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -109,23 +117,25 @@ declare @llvm.vp.rint.nxv4bf16(, @vp_rint_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -134,18 +144,18 @@ define @vp_rint_nxv4bf16( %va, @vp_rint_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -157,23 +167,25 @@ declare @llvm.vp.rint.nxv8bf16(, @vp_rint_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -182,18 +194,18 @@ define @vp_rint_nxv8bf16( %va, @vp_rint_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -205,23 +217,25 @@ declare @llvm.vp.rint.nxv16bf16(, < define @vp_rint_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -230,18 +244,18 @@ define @vp_rint_nxv16bf16( %va, @vp_rint_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -259,54 +273,60 @@ define @vp_rint_nxv32bf16( %va, @vp_rint_nxv32bf16( %va, @vp_rint_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -387,21 +426,25 @@ define @vp_rint_nxv1f16( %va, @llvm.vp.rint.nxv1f16( %va, %m, i32 %evl) ret %v @@ -423,18 +466,18 @@ define @vp_rint_nxv1f16_unmasked( %va, i3 ; ; ZVFHMIN-LABEL: vp_rint_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -461,21 +504,25 @@ define @vp_rint_nxv2f16( %va, @llvm.vp.rint.nxv2f16( %va, %m, i32 %evl) ret %v @@ -497,18 +544,18 @@ define @vp_rint_nxv2f16_unmasked( %va, i3 ; ; ZVFHMIN-LABEL: vp_rint_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -535,23 +582,25 @@ define @vp_rint_nxv4f16( %va, @llvm.vp.rint.nxv4f16( %va, %m, i32 %evl) ret %v @@ -573,18 +622,18 @@ define @vp_rint_nxv4f16_unmasked( %va, i3 ; ; ZVFHMIN-LABEL: vp_rint_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -613,23 +662,25 @@ define @vp_rint_nxv8f16( %va, @llvm.vp.rint.nxv8f16( %va, %m, i32 %evl) ret %v @@ -651,18 +702,18 @@ define @vp_rint_nxv8f16_unmasked( %va, i3 ; ; ZVFHMIN-LABEL: vp_rint_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -691,23 +742,25 @@ define @vp_rint_nxv16f16( %va, @llvm.vp.rint.nxv16f16( %va, %m, i32 %evl) ret %v @@ -729,18 +782,18 @@ define @vp_rint_nxv16f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vp_rint_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -775,54 +828,60 @@ define @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vp_rint_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v17, v16 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t -; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.rint.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 4e15f13570583..39744dcecd718 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -17,23 +17,27 @@ declare @llvm.vp.round.nxv1bf16(, @vp_round_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -42,12 +46,12 @@ define @vp_round_nxv1bf16( %va, @vp_round_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -55,7 +59,7 @@ define @vp_round_nxv1bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -67,23 +71,27 @@ declare @llvm.vp.round.nxv2bf16(, @vp_round_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -92,12 +100,12 @@ define @vp_round_nxv2bf16( %va, @vp_round_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -105,7 +113,7 @@ define @vp_round_nxv2bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -117,25 +125,27 @@ declare @llvm.vp.round.nxv4bf16(, @vp_round_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -144,12 +154,12 @@ define @vp_round_nxv4bf16( %va, @vp_round_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -157,7 +167,7 @@ define @vp_round_nxv4bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -169,25 +179,27 @@ declare @llvm.vp.round.nxv8bf16(, @vp_round_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -196,12 +208,12 @@ define @vp_round_nxv8bf16( %va, @vp_round_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -209,7 +221,7 @@ define @vp_round_nxv8bf16_unmasked( % ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -221,25 +233,27 @@ declare @llvm.vp.round.nxv16bf16(, define @vp_round_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -248,12 +262,12 @@ define @vp_round_nxv16bf16( %va, @vp_round_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -261,7 +275,7 @@ define @vp_round_nxv16bf16_unmasked( @llvm.vp.round.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -279,59 +293,64 @@ define @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 @@ -390,8 +422,14 @@ define @vp_round_nxv32bf16_unmasked( @llvm.vp.round.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -418,23 +456,27 @@ define @vp_round_nxv1f16( %va, @llvm.vp.round.nxv1f16( %va, %m, i32 %evl) ret %v @@ -458,12 +500,12 @@ define @vp_round_nxv1f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_round_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -471,7 +513,7 @@ define @vp_round_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -500,23 +542,27 @@ define @vp_round_nxv2f16( %va, @llvm.vp.round.nxv2f16( %va, %m, i32 %evl) ret %v @@ -540,12 +586,12 @@ define @vp_round_nxv2f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_round_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -553,7 +599,7 @@ define @vp_round_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -582,25 +628,27 @@ define @vp_round_nxv4f16( %va, @llvm.vp.round.nxv4f16( %va, %m, i32 %evl) ret %v @@ -624,12 +672,12 @@ define @vp_round_nxv4f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_round_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -637,7 +685,7 @@ define @vp_round_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -668,25 +716,27 @@ define @vp_round_nxv8f16( %va, @llvm.vp.round.nxv8f16( %va, %m, i32 %evl) ret %v @@ -710,12 +760,12 @@ define @vp_round_nxv8f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vp_round_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -723,7 +773,7 @@ define @vp_round_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -754,25 +804,27 @@ define @vp_round_nxv16f16( %va, @llvm.vp.round.nxv16f16( %va, %m, i32 %evl) ret %v @@ -796,12 +848,12 @@ define @vp_round_nxv16f16_unmasked( %va ; ; ZVFHMIN-LABEL: vp_round_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -809,7 +861,7 @@ define @vp_round_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -846,59 +898,64 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vp_round_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v17, v16 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 -; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 4 @@ -972,8 +1042,14 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.round.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index f1510a8c53181..df5844277c997 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -17,23 +17,27 @@ declare @llvm.vp.roundeven.nxv1bf16(, define @vp_roundeven_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -42,12 +46,12 @@ define @vp_roundeven_nxv1bf16( %va, < define @vp_roundeven_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -55,7 +59,7 @@ define @vp_roundeven_nxv1bf16_unmasked( @llvm.vp.roundeven.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -67,23 +71,27 @@ declare @llvm.vp.roundeven.nxv2bf16(, define @vp_roundeven_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -92,12 +100,12 @@ define @vp_roundeven_nxv2bf16( %va, < define @vp_roundeven_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -105,7 +113,7 @@ define @vp_roundeven_nxv2bf16_unmasked( @llvm.vp.roundeven.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -117,25 +125,27 @@ declare @llvm.vp.roundeven.nxv4bf16(, define @vp_roundeven_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -144,12 +154,12 @@ define @vp_roundeven_nxv4bf16( %va, < define @vp_roundeven_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -157,7 +167,7 @@ define @vp_roundeven_nxv4bf16_unmasked( @llvm.vp.roundeven.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -169,25 +179,27 @@ declare @llvm.vp.roundeven.nxv8bf16(, define @vp_roundeven_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -196,12 +208,12 @@ define @vp_roundeven_nxv8bf16( %va, < define @vp_roundeven_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -209,7 +221,7 @@ define @vp_roundeven_nxv8bf16_unmasked( @llvm.vp.roundeven.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -221,25 +233,27 @@ declare @llvm.vp.roundeven.nxv16bf16( @vp_roundeven_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -248,12 +262,12 @@ define @vp_roundeven_nxv16bf16( %va define @vp_roundeven_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -261,7 +275,7 @@ define @vp_roundeven_nxv16bf16_unmasked( @llvm.vp.roundeven.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -279,59 +293,64 @@ define @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v0, a2 +; CHECK-NEXT: vslidedown.vx v17, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v18, v17 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -346,42 +365,55 @@ define @vp_roundeven_nxv32bf16( %va define @vp_roundeven_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 0 @@ -390,8 +422,14 @@ define @vp_roundeven_nxv32bf16_unmasked( @llvm.vp.roundeven.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -418,23 +456,27 @@ define @vp_roundeven_nxv1f16( %va, @llvm.vp.roundeven.nxv1f16( %va, %m, i32 %evl) ret %v @@ -458,12 +500,12 @@ define @vp_roundeven_nxv1f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_roundeven_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -471,7 +513,7 @@ define @vp_roundeven_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -500,23 +542,27 @@ define @vp_roundeven_nxv2f16( %va, @llvm.vp.roundeven.nxv2f16( %va, %m, i32 %evl) ret %v @@ -540,12 +586,12 @@ define @vp_roundeven_nxv2f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_roundeven_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -553,7 +599,7 @@ define @vp_roundeven_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -582,25 +628,27 @@ define @vp_roundeven_nxv4f16( %va, @llvm.vp.roundeven.nxv4f16( %va, %m, i32 %evl) ret %v @@ -624,12 +672,12 @@ define @vp_roundeven_nxv4f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_roundeven_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -637,7 +685,7 @@ define @vp_roundeven_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -668,25 +716,27 @@ define @vp_roundeven_nxv8f16( %va, @llvm.vp.roundeven.nxv8f16( %va, %m, i32 %evl) ret %v @@ -710,12 +760,12 @@ define @vp_roundeven_nxv8f16_unmasked( %v ; ; ZVFHMIN-LABEL: vp_roundeven_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -723,7 +773,7 @@ define @vp_roundeven_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -754,25 +804,27 @@ define @vp_roundeven_nxv16f16( %va, @llvm.vp.roundeven.nxv16f16( %va, %m, i32 %evl) ret %v @@ -796,12 +848,12 @@ define @vp_roundeven_nxv16f16_unmasked( ; ; ZVFHMIN-LABEL: vp_roundeven_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -809,7 +861,7 @@ define @vp_roundeven_nxv16f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -846,59 +898,64 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16_unmasked( ; ; ZVFHMIN-LABEL: vp_roundeven_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v17, v16 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 -; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 0 @@ -972,8 +1042,14 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundeven.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index dd7db58ccdf34..1300d8cd64ebb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -17,23 +17,27 @@ declare @llvm.vp.roundtozero.nxv1bf16( @vp_roundtozero_nxv1bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -42,12 +46,12 @@ define @vp_roundtozero_nxv1bf16( %va, define @vp_roundtozero_nxv1bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -55,7 +59,7 @@ define @vp_roundtozero_nxv1bf16_unmasked( @llvm.vp.roundtozero.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -67,23 +71,27 @@ declare @llvm.vp.roundtozero.nxv2bf16( @vp_roundtozero_nxv2bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfabs.v v8, v9, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfabs.v v11, v10, v0.t +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t +; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -92,12 +100,12 @@ define @vp_roundtozero_nxv2bf16( %va, define @vp_roundtozero_nxv2bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -105,7 +113,7 @@ define @vp_roundtozero_nxv2bf16_unmasked( @llvm.vp.roundtozero.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -117,25 +125,27 @@ declare @llvm.vp.roundtozero.nxv4bf16( @vp_roundtozero_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -144,12 +154,12 @@ define @vp_roundtozero_nxv4bf16( %va, define @vp_roundtozero_nxv4bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -157,7 +167,7 @@ define @vp_roundtozero_nxv4bf16_unmasked( @llvm.vp.roundtozero.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -169,25 +179,27 @@ declare @llvm.vp.roundtozero.nxv8bf16( @vp_roundtozero_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -196,12 +208,12 @@ define @vp_roundtozero_nxv8bf16( %va, define @vp_roundtozero_nxv8bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -209,7 +221,7 @@ define @vp_roundtozero_nxv8bf16_unmasked( @llvm.vp.roundtozero.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -221,25 +233,27 @@ declare @llvm.vp.roundtozero.nxv16bf16( @vp_roundtozero_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -248,12 +262,12 @@ define @vp_roundtozero_nxv16bf16( % define @vp_roundtozero_nxv16bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t @@ -261,7 +275,7 @@ define @vp_roundtozero_nxv16bf16_unmasked( @llvm.vp.roundtozero.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -279,59 +293,64 @@ define @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v0, a2 +; CHECK-NEXT: vslidedown.vx v17, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v18, v17 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -346,42 +365,55 @@ define @vp_roundtozero_nxv32bf16( % define @vp_roundtozero_nxv32bf16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv32bf16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vmset.m v16 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v24, a2 +; CHECK-NEXT: vslidedown.vx v16, v16, a2 ; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: vmv1r.v v17, v16 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 1 @@ -390,8 +422,14 @@ define @vp_roundtozero_nxv32bf16_unmasked( @llvm.vp.roundtozero.nxv32bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -418,23 +456,27 @@ define @vp_roundtozero_nxv1f16( %va, @llvm.vp.roundtozero.nxv1f16( %va, %m, i32 %evl) ret %v @@ -458,12 +500,12 @@ define @vp_roundtozero_nxv1f16_unmasked( ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -471,7 +513,7 @@ define @vp_roundtozero_nxv1f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -500,23 +542,27 @@ define @vp_roundtozero_nxv2f16( %va, @llvm.vp.roundtozero.nxv2f16( %va, %m, i32 %evl) ret %v @@ -540,12 +586,12 @@ define @vp_roundtozero_nxv2f16_unmasked( ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t @@ -553,7 +599,7 @@ define @vp_roundtozero_nxv2f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -582,25 +628,27 @@ define @vp_roundtozero_nxv4f16( %va, @llvm.vp.roundtozero.nxv4f16( %va, %m, i32 %evl) ret %v @@ -624,12 +672,12 @@ define @vp_roundtozero_nxv4f16_unmasked( ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t @@ -637,7 +685,7 @@ define @vp_roundtozero_nxv4f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -668,25 +716,27 @@ define @vp_roundtozero_nxv8f16( %va, @llvm.vp.roundtozero.nxv8f16( %va, %m, i32 %evl) ret %v @@ -710,12 +760,12 @@ define @vp_roundtozero_nxv8f16_unmasked( ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t @@ -723,7 +773,7 @@ define @vp_roundtozero_nxv8f16_unmasked( ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -754,25 +804,27 @@ define @vp_roundtozero_nxv16f16( %va, < ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v12, v0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f16( %va, %m, i32 %evl) ret %v @@ -796,12 +848,12 @@ define @vp_roundtozero_nxv16f16_unmasked( @vp_roundtozero_nxv16f16_unmasked( @llvm.vp.roundtozero.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -846,59 +898,64 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v12, v0, a2 +; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: vmv1r.v v18, v17 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v18, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmv1r.v v0, v18 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v8, v7 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v7, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -928,42 +985,55 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @llvm.vp.roundtozero.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 4a8ee54d87eba..9e78bbdc4f441 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -18,12 +18,12 @@ define @vfadd_vv_nxv1bf16( %va, @llvm.vp.fadd.nxv1bf16( %va, %b, %m, i32 %evl) ret %v @@ -37,7 +37,7 @@ define @vfadd_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.fadd.nxv1bf16( %va, %b, splat (i1 true), i32 %evl) @@ -50,12 +50,12 @@ define @vfadd_vf_nxv1bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -69,12 +69,12 @@ define @vfadd_vf_nxv1bf16_commute( %v ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v9, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -92,7 +92,7 @@ define @vfadd_vf_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v9, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -111,7 +111,7 @@ define @vfadd_vf_nxv1bf16_unmasked_commute( poison, bfloat %b, i32 0 @@ -126,12 +126,12 @@ define @vfadd_vv_nxv2bf16( %va, @llvm.vp.fadd.nxv2bf16( %va, %b, %m, i32 %evl) ret %v @@ -145,7 +145,7 @@ define @vfadd_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.fadd.nxv2bf16( %va, %b, splat (i1 true), i32 %evl) @@ -158,12 +158,12 @@ define @vfadd_vf_nxv2bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -181,7 +181,7 @@ define @vfadd_vf_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -196,12 +196,12 @@ define @vfadd_vv_nxv4bf16( %va, @llvm.vp.fadd.nxv4bf16( %va, %b, %m, i32 %evl) ret %v @@ -215,7 +215,7 @@ define @vfadd_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v10, v12, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.fadd.nxv4bf16( %va, %b, splat (i1 true), i32 %evl) @@ -228,12 +228,12 @@ define @vfadd_vf_nxv4bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v10, v10, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -251,7 +251,7 @@ define @vfadd_vf_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v10, v10, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -266,12 +266,12 @@ define @vfadd_vv_nxv8bf16( %va, @llvm.vp.fadd.nxv8bf16( %va, %b, %m, i32 %evl) ret %v @@ -285,7 +285,7 @@ define @vfadd_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v12, v16, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fadd.nxv8bf16( %va, %b, splat (i1 true), i32 %evl) @@ -298,12 +298,12 @@ define @vfadd_vf_nxv8bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v12, v12, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -321,7 +321,7 @@ define @vfadd_vf_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v12, v12, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -336,12 +336,12 @@ define @vfadd_vv_nxv16bf16( %va, @llvm.vp.fadd.nxv16bf16( %va, %b, %m, i32 %evl) ret %v @@ -355,7 +355,7 @@ define @vfadd_vv_nxv16bf16_unmasked( @llvm.vp.fadd.nxv16bf16( %va, %b, splat (i1 true), i32 %evl) @@ -368,12 +368,12 @@ define @vfadd_vf_nxv16bf16( %va, bf ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -391,7 +391,7 @@ define @vfadd_vf_nxv16bf16_unmasked( poison, bfloat %b, i32 0 @@ -409,10 +409,18 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32bf16_unmasked( @vfadd_vv_nxv32bf16_unmasked( @vfadd_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 4 -; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfadd.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfadd.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB24_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfadd.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -601,19 +637,14 @@ define @vfadd_vf_nxv32bf16_unmasked( @vfadd_vf_nxv32bf16_unmasked( @vfadd_vv_nxv1f16( %va, @llvm.vp.fadd.nxv1f16( %va, %b, %m, i32 %evl) ret %v @@ -704,7 +724,7 @@ define @vfadd_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv1f16( %va, %b, splat (i1 true), i32 %evl) @@ -723,12 +743,12 @@ define @vfadd_vf_nxv1f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -748,12 +768,12 @@ define @vfadd_vf_nxv1f16_commute( %va, ha ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -777,7 +797,7 @@ define @vfadd_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -802,7 +822,7 @@ define @vfadd_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 @@ -823,12 +843,12 @@ define @vfadd_vv_nxv2f16( %va, @llvm.vp.fadd.nxv2f16( %va, %b, %m, i32 %evl) ret %v @@ -848,7 +868,7 @@ define @vfadd_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv2f16( %va, %b, splat (i1 true), i32 %evl) @@ -867,12 +887,12 @@ define @vfadd_vf_nxv2f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -896,7 +916,7 @@ define @vfadd_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -917,12 +937,12 @@ define @vfadd_vv_nxv4f16( %va, @llvm.vp.fadd.nxv4f16( %va, %b, %m, i32 %evl) ret %v @@ -942,7 +962,7 @@ define @vfadd_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv4f16( %va, %b, splat (i1 true), i32 %evl) @@ -961,12 +981,12 @@ define @vfadd_vf_nxv4f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -990,7 +1010,7 @@ define @vfadd_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1011,12 +1031,12 @@ define @vfadd_vv_nxv8f16( %va, @llvm.vp.fadd.nxv8f16( %va, %b, %m, i32 %evl) ret %v @@ -1036,7 +1056,7 @@ define @vfadd_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv8f16( %va, %b, splat (i1 true), i32 %evl) @@ -1055,12 +1075,12 @@ define @vfadd_vf_nxv8f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1084,7 +1104,7 @@ define @vfadd_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1105,12 +1125,12 @@ define @vfadd_vv_nxv16f16( %va, @llvm.vp.fadd.nxv16f16( %va, %b, %m, i32 %evl) ret %v @@ -1130,7 +1150,7 @@ define @vfadd_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fadd.nxv16f16( %va, %b, splat (i1 true), i32 %evl) @@ -1149,12 +1169,12 @@ define @vfadd_vf_nxv16f16( %va, half %b ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1178,7 +1198,7 @@ define @vfadd_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1202,10 +1222,18 @@ define @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB49_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -1294,7 +1350,7 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 @@ -1319,76 +1375,76 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 4 -; ZVFHMIN-NEXT: add a1, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a3, a1, 3 -; ZVFHMIN-NEXT: add a1, a3, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv.v.x v8, a1 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfadd.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfadd.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB50_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB50_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfadd.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -1412,19 +1468,14 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -1433,41 +1484,30 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfadd.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB51_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB51_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 2997fad370e46..532629ef7a8a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -18,12 +18,12 @@ define @vfdiv_vv_nxv1bf16( %va, @llvm.vp.fdiv.nxv1bf16( %va, %b, %m, i32 %evl) ret %v @@ -37,7 +37,7 @@ define @vfdiv_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.fdiv.nxv1bf16( %va, %b, splat (i1 true), i32 %evl) @@ -50,12 +50,12 @@ define @vfdiv_vf_nxv1bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v9, v10, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -73,7 +73,7 @@ define @vfdiv_vf_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v9, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -88,12 +88,12 @@ define @vfdiv_vv_nxv2bf16( %va, @llvm.vp.fdiv.nxv2bf16( %va, %b, %m, i32 %evl) ret %v @@ -107,7 +107,7 @@ define @vfdiv_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.fdiv.nxv2bf16( %va, %b, splat (i1 true), i32 %evl) @@ -120,12 +120,12 @@ define @vfdiv_vf_nxv2bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v9, v10, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -143,7 +143,7 @@ define @vfdiv_vf_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v9, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -158,12 +158,12 @@ define @vfdiv_vv_nxv4bf16( %va, @llvm.vp.fdiv.nxv4bf16( %va, %b, %m, i32 %evl) ret %v @@ -177,7 +177,7 @@ define @vfdiv_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v10, v12, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.fdiv.nxv4bf16( %va, %b, splat (i1 true), i32 %evl) @@ -190,12 +190,12 @@ define @vfdiv_vf_nxv4bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v10, v10, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -213,7 +213,7 @@ define @vfdiv_vf_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v10, v10, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -228,12 +228,12 @@ define @vfdiv_vv_nxv8bf16( %va, @llvm.vp.fdiv.nxv8bf16( %va, %b, %m, i32 %evl) ret %v @@ -247,7 +247,7 @@ define @vfdiv_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v12, v16, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fdiv.nxv8bf16( %va, %b, splat (i1 true), i32 %evl) @@ -260,12 +260,12 @@ define @vfdiv_vf_nxv8bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v12, v12, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -283,7 +283,7 @@ define @vfdiv_vf_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v12, v12, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -298,12 +298,12 @@ define @vfdiv_vv_nxv16bf16( %va, @llvm.vp.fdiv.nxv16bf16( %va, %b, %m, i32 %evl) ret %v @@ -317,7 +317,7 @@ define @vfdiv_vv_nxv16bf16_unmasked( @llvm.vp.fdiv.nxv16bf16( %va, %b, splat (i1 true), i32 %evl) @@ -330,12 +330,12 @@ define @vfdiv_vf_nxv16bf16( %va, bf ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -353,7 +353,7 @@ define @vfdiv_vf_nxv16bf16_unmasked( poison, bfloat %b, i32 0 @@ -371,10 +371,18 @@ define @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32bf16_unmasked( @vfdiv_vv_nxv32bf16_unmasked( @vfdiv_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 4 -; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfdiv.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfdiv.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfdiv.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -563,19 +599,14 @@ define @vfdiv_vf_nxv32bf16_unmasked( @vfdiv_vf_nxv32bf16_unmasked( @vfdiv_vv_nxv1f16( %va, @llvm.vp.fdiv.nxv1f16( %va, %b, %m, i32 %evl) ret %v @@ -666,7 +686,7 @@ define @vfdiv_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv1f16( %va, %b, splat (i1 true), i32 %evl) @@ -685,12 +705,12 @@ define @vfdiv_vf_nxv1f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -714,7 +734,7 @@ define @vfdiv_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -735,12 +755,12 @@ define @vfdiv_vv_nxv2f16( %va, @llvm.vp.fdiv.nxv2f16( %va, %b, %m, i32 %evl) ret %v @@ -760,7 +780,7 @@ define @vfdiv_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv2f16( %va, %b, splat (i1 true), i32 %evl) @@ -779,12 +799,12 @@ define @vfdiv_vf_nxv2f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -808,7 +828,7 @@ define @vfdiv_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -829,12 +849,12 @@ define @vfdiv_vv_nxv4f16( %va, @llvm.vp.fdiv.nxv4f16( %va, %b, %m, i32 %evl) ret %v @@ -854,7 +874,7 @@ define @vfdiv_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv4f16( %va, %b, splat (i1 true), i32 %evl) @@ -873,12 +893,12 @@ define @vfdiv_vf_nxv4f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -902,7 +922,7 @@ define @vfdiv_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -923,12 +943,12 @@ define @vfdiv_vv_nxv8f16( %va, @llvm.vp.fdiv.nxv8f16( %va, %b, %m, i32 %evl) ret %v @@ -948,7 +968,7 @@ define @vfdiv_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv8f16( %va, %b, splat (i1 true), i32 %evl) @@ -967,12 +987,12 @@ define @vfdiv_vf_nxv8f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -996,7 +1016,7 @@ define @vfdiv_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1017,12 +1037,12 @@ define @vfdiv_vv_nxv16f16( %va, @llvm.vp.fdiv.nxv16f16( %va, %b, %m, i32 %evl) ret %v @@ -1042,7 +1062,7 @@ define @vfdiv_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fdiv.nxv16f16( %va, %b, splat (i1 true), i32 %evl) @@ -1061,12 +1081,12 @@ define @vfdiv_vf_nxv16f16( %va, half %b ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1090,7 +1110,7 @@ define @vfdiv_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1114,10 +1134,18 @@ define @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -1206,7 +1262,7 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 @@ -1231,76 +1287,76 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 4 -; ZVFHMIN-NEXT: add a1, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a3, a1, 3 -; ZVFHMIN-NEXT: add a1, a3, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv.v.x v8, a1 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfdiv.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfdiv.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB46_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfdiv.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -1324,19 +1380,14 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -1345,41 +1396,30 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfdiv.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB47_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index ab67e9833c78a..5ee5d40d8313d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -17,14 +17,14 @@ declare @llvm.vp.fma.nxv1bf16(, @vfma_vv_nxv1bf16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v10, v11, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv1bf16( %va, %b, %c, %m, i32 %evl) ret %v @@ -33,13 +33,13 @@ define @vfma_vv_nxv1bf16( %va, @vfma_vv_nxv1bf16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv1bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v10, v11 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv1bf16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -50,15 +50,15 @@ define @vfma_vf_nxv1bf16( %va, bfloat ; CHECK-LABEL: vfma_vf_nxv1bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v11, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -70,15 +70,15 @@ define @vfma_vf_nxv1bf16_commute( %va ; CHECK-LABEL: vfma_vf_nxv1bf16_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v11, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v11, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -90,14 +90,14 @@ define @vfma_vf_nxv1bf16_unmasked( %v ; CHECK-LABEL: vfma_vf_nxv1bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 ; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v11, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -110,14 +110,14 @@ define @vfma_vf_nxv1bf16_unmasked_commute( poison, bfloat %b, i32 0 @@ -131,14 +131,14 @@ declare @llvm.vp.fma.nxv2bf16(, @vfma_vv_nxv2bf16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v10, v11, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv2bf16( %va, %b, %c, %m, i32 %evl) ret %v @@ -147,13 +147,13 @@ define @vfma_vv_nxv2bf16( %va, @vfma_vv_nxv2bf16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv2bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v10, v11 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv2bf16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -164,15 +164,15 @@ define @vfma_vf_nxv2bf16( %va, bfloat ; CHECK-LABEL: vfma_vf_nxv2bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v11, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -184,15 +184,15 @@ define @vfma_vf_nxv2bf16_commute( %va ; CHECK-LABEL: vfma_vf_nxv2bf16_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v11, v8, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v11, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -204,14 +204,14 @@ define @vfma_vf_nxv2bf16_unmasked( %v ; CHECK-LABEL: vfma_vf_nxv2bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 ; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v11, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -224,14 +224,14 @@ define @vfma_vf_nxv2bf16_unmasked_commute( poison, bfloat %b, i32 0 @@ -245,14 +245,14 @@ declare @llvm.vp.fma.nxv4bf16(, @vfma_vv_nxv4bf16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v14, v10, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv4bf16( %va, %b, %c, %m, i32 %evl) ret %v @@ -261,13 +261,13 @@ define @vfma_vv_nxv4bf16( %va, @vfma_vv_nxv4bf16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv4bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v14, v10, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv4bf16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -278,15 +278,15 @@ define @vfma_vf_nxv4bf16( %va, bfloat ; CHECK-LABEL: vfma_vf_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v14, v12, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -298,15 +298,15 @@ define @vfma_vf_nxv4bf16_commute( %va ; CHECK-LABEL: vfma_vf_nxv4bf16_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v12, v14, v10, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -318,14 +318,14 @@ define @vfma_vf_nxv4bf16_unmasked( %v ; CHECK-LABEL: vfma_vf_nxv4bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 ; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v14, v12, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -338,14 +338,14 @@ define @vfma_vf_nxv4bf16_unmasked_commute( poison, bfloat %b, i32 0 @@ -359,14 +359,14 @@ declare @llvm.vp.fma.nxv8bf16(, @vfma_vv_nxv8bf16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v20, v12, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv8bf16( %va, %b, %c, %m, i32 %evl) ret %v @@ -375,13 +375,13 @@ define @vfma_vv_nxv8bf16( %va, @vfma_vv_nxv8bf16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv8bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v20, v12, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv8bf16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -392,15 +392,15 @@ define @vfma_vf_nxv8bf16( %va, bfloat ; CHECK-LABEL: vfma_vf_nxv8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10, v0.t ; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v20, v16, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -412,15 +412,15 @@ define @vfma_vf_nxv8bf16_commute( %va ; CHECK-LABEL: vfma_vf_nxv8bf16_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10, v0.t ; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v20, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -432,14 +432,14 @@ define @vfma_vf_nxv8bf16_unmasked( %v ; CHECK-LABEL: vfma_vf_nxv8bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 ; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v20, v16, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -452,14 +452,14 @@ define @vfma_vf_nxv8bf16_unmasked_commute( poison, bfloat %b, i32 0 @@ -479,17 +479,17 @@ define @vfma_vv_nxv16bf16( %va, @vfma_vv_nxv16bf16( %va, @vfma_vv_nxv16bf16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16bf16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v16, v24 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv16bf16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -520,32 +520,18 @@ define @vfma_vv_nxv16bf16_unmasked( define @vfma_vf_nxv16bf16( %va, bfloat %b, %vc, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vf_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv.v.x v12, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fmv.x.h a0, fa0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vmv.v.x v4, a0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -557,15 +543,15 @@ define @vfma_vf_nxv16bf16_commute( ; CHECK-LABEL: vfma_vf_nxv16bf16_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t ; CHECK-NEXT: vmv.v.x v4, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -577,14 +563,14 @@ define @vfma_vf_nxv16bf16_unmasked( ; CHECK-LABEL: vfma_vf_nxv16bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.h a1, fa0 -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 ; CHECK-NEXT: vmv.v.x v12, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v24, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -597,14 +583,14 @@ define @vfma_vf_nxv16bf16_unmasked_commute( poison, bfloat %b, i32 0 @@ -621,142 +607,124 @@ define @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16_unmasked( ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re16.v v16, (a0) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v7 +; CHECK-NEXT: vmset.m v24 ; CHECK-NEXT: slli a0, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a1, a0 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v7, a2 +; CHECK-NEXT: vslidedown.vx v0, v24, a2 ; CHECK-NEXT: sltu a2, a1, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 @@ -801,34 +769,32 @@ define @vfma_vv_nxv32bf16_unmasked( ; CHECK-NEXT: slli a3, a3, 3 ; CHECK-NEXT: add a3, sp, a3 ; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: vmv8r.v v24, v16 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: slli a3, a3, 1 -; CHECK-NEXT: add a3, a3, a4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8, v0.t ; CHECK-NEXT: bltu a1, a0, .LBB31_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a0 @@ -837,31 +803,33 @@ define @vfma_vv_nxv32bf16_unmasked( ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfmacc.vv v0, v16, v24 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v0, v24, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v16, v0 +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 @@ -879,140 +847,127 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a2, fa0 ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a4, a1, 5 -; CHECK-NEXT: add a1, a4, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: slli a1, a3, 1 ; CHECK-NEXT: srli a3, a3, 2 ; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sltu a3, a0, a4 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a4 ; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a5, a5, a4 +; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: slli a4, a4, 1 ; CHECK-NEXT: add a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a4, a2, 4 -; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 5 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 5 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 4 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 5 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 @@ -1032,140 +987,132 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a4, a1, 5 -; CHECK-NEXT: add a1, a4, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: slli a1, a3, 1 -; CHECK-NEXT: srli a3, a3, 2 -; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: sltu a3, a0, a4 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a5, a5, a4 -; CHECK-NEXT: slli a4, a4, 1 -; CHECK-NEXT: add a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 -; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a4, a2, 4 -; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t +; CHECK-NEXT: vmv4r.v v24, v8 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB33_2: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 5 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 4 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 5 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 5 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 @@ -1191,24 +1138,12 @@ define @vfma_vf_nxv32bf16_unmasked( ; CHECK-NEXT: fmv.x.h a2, fa0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmset.m v7 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmset.m v24 ; CHECK-NEXT: slli a1, a3, 1 ; CHECK-NEXT: srli a3, a3, 2 ; CHECK-NEXT: sub a4, a0, a1 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v7, a3 +; CHECK-NEXT: vslidedown.vx v0, v24, a3 ; CHECK-NEXT: sltu a3, a0, a4 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a4 @@ -1216,57 +1151,76 @@ define @vfma_vf_nxv32bf16_unmasked( ; CHECK-NEXT: slli a4, a4, 4 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v24, a2 -; CHECK-NEXT: vmv4r.v v8, v24 +; CHECK-NEXT: vmv.v.x v8, a2 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28, v0.t ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v24, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v16, v0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb @@ -1291,86 +1245,92 @@ define @vfma_vf_nxv32bf16_unmasked_commute( @vfma_vv_nxv1f16( %va, @llvm.vp.fma.nxv1f16( %va, %b, %c, %m, i32 %evl) ret %v @@ -1418,13 +1378,13 @@ define @vfma_vv_nxv1f16_unmasked( %va, @llvm.vp.fma.nxv1f16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -1441,15 +1401,15 @@ define @vfma_vf_nxv1f16( %va, half %b, poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1467,15 +1427,15 @@ define @vfma_vf_nxv1f16_commute( %va, hal ; ZVFHMIN-LABEL: vfma_vf_nxv1f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v11, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1493,14 +1453,14 @@ define @vfma_vf_nxv1f16_unmasked( %va, ha ; ZVFHMIN-LABEL: vfma_vf_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1519,14 +1479,14 @@ define @vfma_vf_nxv1f16_unmasked_commute( ; ZVFHMIN-LABEL: vfma_vf_nxv1f16_unmasked_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1547,14 +1507,14 @@ define @vfma_vv_nxv2f16( %va, @llvm.vp.fma.nxv2f16( %va, %b, %c, %m, i32 %evl) ret %v @@ -1569,13 +1529,13 @@ define @vfma_vv_nxv2f16_unmasked( %va, @llvm.vp.fma.nxv2f16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -1592,15 +1552,15 @@ define @vfma_vf_nxv2f16( %va, half %b, poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1618,15 +1578,15 @@ define @vfma_vf_nxv2f16_commute( %va, hal ; ZVFHMIN-LABEL: vfma_vf_nxv2f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v11, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1644,14 +1604,14 @@ define @vfma_vf_nxv2f16_unmasked( %va, ha ; ZVFHMIN-LABEL: vfma_vf_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1670,14 +1630,14 @@ define @vfma_vf_nxv2f16_unmasked_commute( ; ZVFHMIN-LABEL: vfma_vf_nxv2f16_unmasked_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1698,14 +1658,14 @@ define @vfma_vv_nxv4f16( %va, @llvm.vp.fma.nxv4f16( %va, %b, %c, %m, i32 %evl) ret %v @@ -1720,13 +1680,13 @@ define @vfma_vv_nxv4f16_unmasked( %va, @llvm.vp.fma.nxv4f16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -1743,15 +1703,15 @@ define @vfma_vf_nxv4f16( %va, half %b, poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1769,15 +1729,15 @@ define @vfma_vf_nxv4f16_commute( %va, hal ; ZVFHMIN-LABEL: vfma_vf_nxv4f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1795,14 +1755,14 @@ define @vfma_vf_nxv4f16_unmasked( %va, ha ; ZVFHMIN-LABEL: vfma_vf_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1821,14 +1781,14 @@ define @vfma_vf_nxv4f16_unmasked_commute( ; ZVFHMIN-LABEL: vfma_vf_nxv4f16_unmasked_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1849,14 +1809,14 @@ define @vfma_vv_nxv8f16( %va, @llvm.vp.fma.nxv8f16( %va, %b, %c, %m, i32 %evl) ret %v @@ -1871,13 +1831,13 @@ define @vfma_vv_nxv8f16_unmasked( %va, @llvm.vp.fma.nxv8f16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -1894,15 +1854,15 @@ define @vfma_vf_nxv8f16( %va, half %b, poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1920,15 +1880,15 @@ define @vfma_vf_nxv8f16_commute( %va, hal ; ZVFHMIN-LABEL: vfma_vf_nxv8f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1946,14 +1906,14 @@ define @vfma_vf_nxv8f16_unmasked( %va, ha ; ZVFHMIN-LABEL: vfma_vf_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1972,14 +1932,14 @@ define @vfma_vf_nxv8f16_unmasked_commute( ; ZVFHMIN-LABEL: vfma_vf_nxv8f16_unmasked_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -2006,17 +1966,17 @@ define @vfma_vv_nxv16f16( %va, @vfma_vv_nxv16f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vfma_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fma.nxv16f16( %va, %b, %c, splat (i1 true), i32 %evl) @@ -2059,32 +2019,18 @@ define @vfma_vf_nxv16f16( %va, half %b, ; ; ZVFHMIN-LABEL: vfma_vf_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv4r.v v16, v8 +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vmv.v.x v4, a0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2102,15 +2048,15 @@ define @vfma_vf_nxv16f16_commute( %va, ; ZVFHMIN-LABEL: vfma_vf_nxv16f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2128,14 +2074,14 @@ define @vfma_vf_nxv16f16_unmasked( %va, ; ZVFHMIN-LABEL: vfma_vf_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -2154,14 +2100,14 @@ define @vfma_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 @@ -2186,142 +2132,124 @@ define @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: sub sp, sp, a2 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vl8re16.v v24, (a0) -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8re16.v v16, (a0) ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: slli a0, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a1, a0 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: sltu a2, a1, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 @@ -2373,34 +2301,32 @@ define @vfma_vv_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a3, a3, 3 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v8, v16 -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB67_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 @@ -2409,31 +2335,33 @@ define @vfma_vv_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmacc.vv v0, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -2457,151 +2385,138 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: mv a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: mv a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a4, a1, 5 -; ZVFHMIN-NEXT: add a1, a4, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: csrr a5, vlenb -; ZVFHMIN-NEXT: slli a5, a5, 4 -; ZVFHMIN-NEXT: add a5, sp, a5 -; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 ; ZVFHMIN-NEXT: sltu a3, a0, a4 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a5, a5, a4 +; ZVFHMIN-NEXT: mv a5, a4 ; ZVFHMIN-NEXT: slli a4, a4, 1 ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB68_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB68_2: -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 5 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a2, a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 5 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 4 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 5 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a1, a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 -; ZVFHMIN-NEXT: ret - %elt.head = insertelement poison, half %b, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %vc, %m, i32 %evl) - ret %v +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFHMIN-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %vc, %m, i32 %evl) + ret %v } define @vfma_vf_nxv32f16_commute( %va, half %b, %vc, %m, i32 zeroext %evl) { @@ -2616,140 +2531,132 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: mv a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a2, a2, a1 +; ZVFHMIN-NEXT: mv a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a4, a1, 5 -; ZVFHMIN-NEXT: add a1, a4, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: slli a1, a3, 1 -; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: csrr a5, vlenb -; ZVFHMIN-NEXT: slli a5, a5, 4 -; ZVFHMIN-NEXT: add a5, sp, a5 -; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 -; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a5, a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a2, a2, a3 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 4 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vmv4r.v v24, v8 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB69_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB69_2: -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 5 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a2, a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 4 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 5 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 5 -; ZVFHMIN-NEXT: add a1, a2, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a1, a1, a0 +; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -2781,24 +2688,12 @@ define @vfma_vf_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a4, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 ; ZVFHMIN-NEXT: sltu a3, a0, a4 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 @@ -2806,57 +2701,76 @@ define @vfma_vf_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: addi a4, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vmv4r.v v8, v24 +; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB70_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB70_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0 ; ZVFHMIN-NEXT: vmv8r.v v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -2887,86 +2801,92 @@ define @vfma_vf_nxv32f16_unmasked_commute( @vfmsub_vv_nxv1f16( %va, @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv1f16( %va, %b, %negc, %m, i32 %evl) @@ -3905,16 +3823,14 @@ define @vfmsub_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vxor.vx v8, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) @@ -3932,19 +3848,17 @@ define @vfmsub_vf_nxv1f16( %va, half %b, ; ZVFHMIN-LABEL: vfmsub_vf_nxv1f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -3963,19 +3877,17 @@ define @vfmsub_vf_nxv1f16_commute( %va, h ; ZVFHMIN-LABEL: vfmsub_vf_nxv1f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v11, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -3994,18 +3906,16 @@ define @vfmsub_vf_nxv1f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vf_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -4025,18 +3935,16 @@ define @vfmsub_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 @@ -4060,14 +3968,13 @@ define @vfnmadd_vv_nxv1f16( %va, @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) @@ -4088,14 +3995,13 @@ define @vfnmadd_vv_nxv1f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) @@ -4116,13 +4022,12 @@ define @vfnmadd_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) @@ -4144,13 +4049,12 @@ define @vfnmadd_vv_nxv1f16_unmasked_commuted( @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) @@ -4169,20 +4073,18 @@ define @vfnmadd_vf_nxv1f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4202,20 +4104,18 @@ define @vfnmadd_vf_nxv1f16_commute( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4235,19 +4135,17 @@ define @vfnmadd_vf_nxv1f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -4268,19 +4166,17 @@ define @vfnmadd_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 @@ -4303,17 +4199,16 @@ define @vfnmadd_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4335,17 +4230,16 @@ define @vfnmadd_vf_nxv1f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4367,16 +4261,15 @@ define @vfnmadd_vf_nxv1f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -4399,16 +4292,15 @@ define @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -4433,14 +4325,13 @@ define @vfnmsub_vv_nxv1f16( %va, @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) @@ -4461,14 +4352,13 @@ define @vfnmsub_vv_nxv1f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) @@ -4489,13 +4379,12 @@ define @vfnmsub_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) @@ -4517,13 +4406,12 @@ define @vfnmsub_vv_nxv1f16_unmasked_commuted( @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) @@ -4542,19 +4430,17 @@ define @vfnmsub_vf_nxv1f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4573,19 +4459,17 @@ define @vfnmsub_vf_nxv1f16_commute( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4604,18 +4488,16 @@ define @vfnmsub_vf_nxv1f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -4635,18 +4517,16 @@ define @vfnmsub_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 @@ -4668,18 +4548,15 @@ define @vfnmsub_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4700,18 +4577,15 @@ define @vfnmsub_vf_nxv1f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4732,17 +4606,14 @@ define @vfnmsub_vf_nxv1f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -4764,17 +4635,14 @@ define @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -4797,17 +4665,15 @@ define @vfmsub_vv_nxv2f16( %va, @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv2f16( %va, %b, %negc, %m, i32 %evl) @@ -4824,16 +4690,14 @@ define @vfmsub_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vxor.vx v8, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) @@ -4851,19 +4715,17 @@ define @vfmsub_vf_nxv2f16( %va, half %b, ; ZVFHMIN-LABEL: vfmsub_vf_nxv2f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4882,19 +4744,17 @@ define @vfmsub_vf_nxv2f16_commute( %va, h ; ZVFHMIN-LABEL: vfmsub_vf_nxv2f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v11, v8, v9, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4913,18 +4773,16 @@ define @vfmsub_vf_nxv2f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vf_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -4944,18 +4802,16 @@ define @vfmsub_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 @@ -4979,14 +4835,13 @@ define @vfnmadd_vv_nxv2f16( %va, @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) @@ -5007,14 +4862,13 @@ define @vfnmadd_vv_nxv2f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) @@ -5035,13 +4889,12 @@ define @vfnmadd_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) @@ -5063,13 +4916,12 @@ define @vfnmadd_vv_nxv2f16_unmasked_commuted( @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) @@ -5088,20 +4940,18 @@ define @vfnmadd_vf_nxv2f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5121,20 +4971,18 @@ define @vfnmadd_vf_nxv2f16_commute( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5154,19 +5002,17 @@ define @vfnmadd_vf_nxv2f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -5187,19 +5033,17 @@ define @vfnmadd_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 @@ -5222,17 +5066,16 @@ define @vfnmadd_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5254,17 +5097,16 @@ define @vfnmadd_vf_nxv2f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5286,16 +5128,15 @@ define @vfnmadd_vf_nxv2f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -5318,16 +5159,15 @@ define @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -5352,14 +5192,13 @@ define @vfnmsub_vv_nxv2f16( %va, @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) @@ -5380,14 +5219,13 @@ define @vfnmsub_vv_nxv2f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) @@ -5408,13 +5246,12 @@ define @vfnmsub_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) @@ -5436,13 +5273,12 @@ define @vfnmsub_vv_nxv2f16_unmasked_commuted( @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) @@ -5461,19 +5297,17 @@ define @vfnmsub_vf_nxv2f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5492,19 +5326,17 @@ define @vfnmsub_vf_nxv2f16_commute( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5523,18 +5355,16 @@ define @vfnmsub_vf_nxv2f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -5554,18 +5384,16 @@ define @vfnmsub_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 @@ -5587,18 +5415,15 @@ define @vfnmsub_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5619,18 +5444,15 @@ define @vfnmsub_vf_nxv2f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5651,17 +5473,14 @@ define @vfnmsub_vf_nxv2f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -5683,17 +5502,14 @@ define @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -5716,17 +5532,15 @@ define @vfmsub_vv_nxv4f16( %va, @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv4f16( %va, %b, %negc, %m, i32 %evl) @@ -5743,16 +5557,14 @@ define @vfmsub_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vxor.vx v8, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) @@ -5770,19 +5582,17 @@ define @vfmsub_vf_nxv4f16( %va, half %b, ; ZVFHMIN-LABEL: vfmsub_vf_nxv4f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5801,19 +5611,17 @@ define @vfmsub_vf_nxv4f16_commute( %va, h ; ZVFHMIN-LABEL: vfmsub_vf_nxv4f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v14, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v14, v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5832,18 +5640,16 @@ define @vfmsub_vf_nxv4f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vf_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v9, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -5863,18 +5669,16 @@ define @vfmsub_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 @@ -5898,14 +5702,13 @@ define @vfnmadd_vv_nxv4f16( %va, @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) @@ -5926,14 +5729,13 @@ define @vfnmadd_vv_nxv4f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) @@ -5954,13 +5756,12 @@ define @vfnmadd_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) @@ -5982,13 +5783,12 @@ define @vfnmadd_vv_nxv4f16_unmasked_commuted( @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) @@ -6007,20 +5807,18 @@ define @vfnmadd_vf_nxv4f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6040,20 +5838,18 @@ define @vfnmadd_vf_nxv4f16_commute( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6073,19 +5869,17 @@ define @vfnmadd_vf_nxv4f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -6106,19 +5900,17 @@ define @vfnmadd_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 @@ -6141,17 +5933,16 @@ define @vfnmadd_vf_nxv4f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6173,17 +5964,16 @@ define @vfnmadd_vf_nxv4f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6205,16 +5995,15 @@ define @vfnmadd_vf_nxv4f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -6237,16 +6026,15 @@ define @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -6271,14 +6059,13 @@ define @vfnmsub_vv_nxv4f16( %va, @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) @@ -6299,14 +6086,13 @@ define @vfnmsub_vv_nxv4f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) @@ -6327,13 +6113,12 @@ define @vfnmsub_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) @@ -6355,13 +6140,12 @@ define @vfnmsub_vv_nxv4f16_unmasked_commuted( @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) @@ -6380,19 +6164,17 @@ define @vfnmsub_vf_nxv4f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v14, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6411,19 +6193,17 @@ define @vfnmsub_vf_nxv4f16_commute( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v14, v8, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v14, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6442,18 +6222,16 @@ define @vfnmsub_vf_nxv4f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -6473,18 +6251,16 @@ define @vfnmsub_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 @@ -6506,18 +6282,15 @@ define @vfnmsub_vf_nxv4f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6538,18 +6311,15 @@ define @vfnmsub_vf_nxv4f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6570,17 +6340,14 @@ define @vfnmsub_vf_nxv4f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -6602,17 +6369,14 @@ define @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -6635,17 +6399,15 @@ define @vfmsub_vv_nxv8f16( %va, @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv8f16( %va, %b, %negc, %m, i32 %evl) @@ -6662,16 +6424,14 @@ define @vfmsub_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vxor.vx v8, v12, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) @@ -6689,19 +6449,17 @@ define @vfmsub_vf_nxv8f16( %va, half %b, ; ZVFHMIN-LABEL: vfmsub_vf_nxv8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6720,19 +6478,17 @@ define @vfmsub_vf_nxv8f16_commute( %va, h ; ZVFHMIN-LABEL: vfmsub_vf_nxv8f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v20, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v20, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6751,18 +6507,16 @@ define @vfmsub_vf_nxv8f16_unmasked( %va, ; ZVFHMIN-LABEL: vfmsub_vf_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v10, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -6782,18 +6536,16 @@ define @vfmsub_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 @@ -6817,14 +6569,13 @@ define @vfnmadd_vv_nxv8f16( %va, @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) @@ -6845,14 +6596,13 @@ define @vfnmadd_vv_nxv8f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) @@ -6873,13 +6623,12 @@ define @vfnmadd_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) @@ -6901,13 +6650,12 @@ define @vfnmadd_vv_nxv8f16_unmasked_commuted( @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) @@ -6926,20 +6674,18 @@ define @vfnmadd_vf_nxv8f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6959,20 +6705,18 @@ define @vfnmadd_vf_nxv8f16_commute( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6992,19 +6736,17 @@ define @vfnmadd_vf_nxv8f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -7025,19 +6767,17 @@ define @vfnmadd_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 @@ -7060,17 +6800,16 @@ define @vfnmadd_vf_nxv8f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7092,17 +6831,16 @@ define @vfnmadd_vf_nxv8f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7124,16 +6862,15 @@ define @vfnmadd_vf_nxv8f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -7156,16 +6893,15 @@ define @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -7190,14 +6926,13 @@ define @vfnmsub_vv_nxv8f16( %va, @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) @@ -7218,14 +6953,13 @@ define @vfnmsub_vv_nxv8f16_commuted( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) @@ -7246,13 +6980,12 @@ define @vfnmsub_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) @@ -7274,13 +7007,12 @@ define @vfnmsub_vv_nxv8f16_unmasked_commuted( @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) @@ -7299,19 +7031,17 @@ define @vfnmsub_vf_nxv8f16( %va, half %b, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7330,19 +7060,17 @@ define @vfnmsub_vf_nxv8f16_commute( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v20, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v20, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7361,18 +7089,16 @@ define @vfnmsub_vf_nxv8f16_unmasked( %va, ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -7392,18 +7118,16 @@ define @vfnmsub_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 @@ -7425,18 +7149,15 @@ define @vfnmsub_vf_nxv8f16_neg_splat( %va ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vxor.vx v10, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7457,18 +7178,15 @@ define @vfnmsub_vf_nxv8f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7489,17 +7207,14 @@ define @vfnmsub_vf_nxv8f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -7521,17 +7236,14 @@ define @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -7553,34 +7265,18 @@ define @vfmsub_vv_nxv16f16( %va, @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv16f16( %va, %b, %negc, %m, i32 %evl) @@ -7597,16 +7293,14 @@ define @vfmsub_vv_nxv16f16_unmasked( %v ; ZVFHMIN-LABEL: vfmsub_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vxor.vx v8, v16, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: ret %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) @@ -7623,36 +7317,20 @@ define @vfmsub_vf_nxv16f16( %va, half % ; ; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFHMIN-NEXT: vmv4r.v v16, v8 +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vmv.v.x v4, a0 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7671,19 +7349,17 @@ define @vfmsub_vf_nxv16f16_commute( %va ; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vmv.v.x v4, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7708,22 +7384,20 @@ define @vfmsub_vf_nxv16f16_unmasked( %v ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v12, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v12, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 2 @@ -7755,22 +7429,20 @@ define @vfmsub_vf_nxv16f16_unmasked_commute( @vfnmadd_vv_nxv16f16( %va, @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) @@ -7834,18 +7505,17 @@ define @vfnmadd_vv_nxv16f16_commuted( % ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v24, v16, a1, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -7872,13 +7542,12 @@ define @vfnmadd_vv_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1 ; ZVFHMIN-NEXT: vxor.vx v16, v16, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) @@ -7900,13 +7569,12 @@ define @vfnmadd_vv_nxv16f16_unmasked_commuted( @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) @@ -7924,36 +7592,19 @@ define @vfnmadd_vf_nxv16f16( %va, half ; ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v20, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7973,20 +7624,18 @@ define @vfnmadd_vf_nxv16f16_commute( %v ; ZVFHMIN-LABEL: vfnmadd_vf_nxv16f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vmv.v.x v4, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v28, v8, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -8012,23 +7661,21 @@ define @vfnmadd_vf_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vxor.vx v12, v12, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: vxor.vx v12, v12, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 2 @@ -8061,23 +7708,21 @@ define @vfnmadd_vf_nxv16f16_unmasked_commute( @vfnmadd_vf_nxv16f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v4, v16, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v4, v16, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -8151,21 +7795,36 @@ define @vfnmadd_vf_nxv16f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -8187,16 +7846,15 @@ define @vfnmadd_vf_nxv16f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -8219,16 +7877,15 @@ define @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -8251,17 +7908,16 @@ define @vfnmsub_vv_nxv16f16( %va, @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) @@ -8287,18 +7943,17 @@ define @vfnmsub_vv_nxv16f16_commuted( % ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v24, v16, a1, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -8325,13 +7980,12 @@ define @vfnmsub_vv_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1 ; ZVFHMIN-NEXT: vxor.vx v16, v16, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) @@ -8353,13 +8007,12 @@ define @vfnmsub_vv_nxv16f16_unmasked_commuted( @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) @@ -8377,36 +8030,20 @@ define @vfnmsub_vf_nxv16f16( %va, half ; ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFHMIN-NEXT: vmv4r.v v16, v12 +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vmv.v.x v4, a0 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -8425,19 +8062,17 @@ define @vfnmsub_vf_nxv16f16_commute( %v ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_commute: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v4, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v4, a1 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -8462,22 +8097,20 @@ define @vfnmsub_vf_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: lui a0, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 2 @@ -8509,22 +8142,20 @@ define @vfnmsub_vf_nxv16f16_unmasked_commute( @vfnmsub_vf_nxv16f16_neg_splat( ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v4, v8 -; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vxor.vx v12, v16, a1, v0.t -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: vmv.v.x v16, a0 +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vxor.vx v24, v16, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -8582,37 +8210,20 @@ define @vfnmsub_vf_nxv16f16_neg_splat_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -8633,17 +8244,14 @@ define @vfnmsub_vf_nxv16f16_neg_splat_unmasked( poison, half %b, i32 0 @@ -8665,17 +8273,14 @@ define @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute( poison, half %b, i32 0 @@ -8701,105 +8306,103 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: sub sp, sp, a2 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: slli a2, a2, 1 ; ZVFHMIN-NEXT: add a2, a2, a3 -; ZVFHMIN-NEXT: sub sp, sp, a2 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vl8re16.v v24, (a0) +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vl8re16.v v8, (a0) ; ZVFHMIN-NEXT: lui a2, 8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a0, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a1, a0 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v20, v7, a3 +; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v0, v24, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v16, v8, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: vmv4r.v v8, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB281_2: +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -8991,50 +8620,33 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a3, 8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v24, v16, a3, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: vmv4r.v v4, v28 -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB282_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB282_2: -; ZVFHMIN-NEXT: vmv8r.v v16, v8 -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vxor.vx v8, v16, a1, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a4, a0, a1 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 +; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -9042,74 +8654,88 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 5 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v24, a2 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB282_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB282_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 @@ -9138,115 +8764,129 @@ define @vfmsub_vf_nxv32f16_commute( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a3, 8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 +; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 4 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v16, a3, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB283_2 -; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB283_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB283_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vmv4r.v v4, v12 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -9271,53 +8911,42 @@ define @vfmsub_vf_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 2 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v24, a1 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v16, v16, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 ; ZVFHMIN-NEXT: sltu a3, a0, a4 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v8, v24 -; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: addi a4, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a4, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 @@ -9325,63 +8954,59 @@ define @vfmsub_vf_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v28, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB284_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB284_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 -; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v0 -; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -9406,119 +9031,100 @@ define @vfmsub_vf_nxv32f16_unmasked_commute( @vfnmadd_vv_nxv32f16( %va, @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, %m, i32 %evl) - ret %v -} - -define @vfnmadd_vv_nxv32f16_commuted( %va, %b, %c, %m, i32 zeroext %evl) { -; ZVFH-LABEL: vfnmadd_vv_nxv32f16_commuted: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vl8re16.v v24, (a0) -; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFH-NEXT: vfnmadd.vv v8, v16, v24, v0.t -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vfnmadd_vv_nxv32f16_commuted: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: slli a0, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a4, a1, a0 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a1, a4 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 2 ; ZVFHMIN-NEXT: add a2, a2, a3 -; ZVFHMIN-NEXT: sub sp, sp, a2 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vl8re16.v v24, (a0) -; ZVFHMIN-NEXT: lui a2, 8 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a3, a1 -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t -; ZVFHMIN-NEXT: slli a2, a0, 1 -; ZVFHMIN-NEXT: vmv4r.v v4, v20 -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a2, .LBB287_2 +; ZVFHMIN-NEXT: bltu a1, a0, .LBB286_2 ; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB287_2: -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sub a2, a1, a2 -; ZVFHMIN-NEXT: srli a0, a0, 2 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 5 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: sltu a1, a1, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 -; ZVFHMIN-NEXT: addi a1, a1, -1 -; ZVFHMIN-NEXT: and a1, a1, a2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: .LBB286_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 @@ -9802,334 +9316,420 @@ define @vfnmadd_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negb, %va, %negc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, %m, i32 %evl) ret %v } -define @vfnmadd_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { -; ZVFH-LABEL: vfnmadd_vv_nxv32f16_unmasked: +define @vfnmadd_vv_nxv32f16_commuted( %va, %b, %c, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfnmadd_vv_nxv32f16_commuted: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vl8re16.v v24, (a0) ; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFH-NEXT: vfnmadd.vv v8, v16, v24 +; ZVFH-NEXT: vfnmadd.vv v8, v16, v24, v0.t ; ZVFH-NEXT: ret ; -; ZVFHMIN-LABEL: vfnmadd_vv_nxv32f16_unmasked: +; ZVFHMIN-LABEL: vfnmadd_vv_nxv32f16_commuted: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: sub sp, sp, a2 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a3, a3, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 2 ; ZVFHMIN-NEXT: add a2, a2, a3 -; ZVFHMIN-NEXT: sub sp, sp, a2 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a2, a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: lui a2, 8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a4, a0, 5 -; ZVFHMIN-NEXT: add a0, a4, a0 +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a0, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a1, a0 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v16, v7, a3 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 -; ZVFHMIN-NEXT: csrr a5, vlenb -; ZVFHMIN-NEXT: mv a6, a5 -; ZVFHMIN-NEXT: slli a5, a5, 3 -; ZVFHMIN-NEXT: add a6, a6, a5 -; ZVFHMIN-NEXT: slli a5, a5, 1 -; ZVFHMIN-NEXT: add a5, a5, a6 -; ZVFHMIN-NEXT: add a5, sp, a5 -; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v0, v16, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: mv a4, a2 ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a4, a4, a2 +; ZVFHMIN-NEXT: mv a4, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 ; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 5 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 5 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 5 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a0, .LBB288_2 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a1, a0, .LBB287_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: .LBB288_2: +; ZVFHMIN-NEXT: .LBB287_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a2, a0 ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a2, a2, a0 +; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a2, a0, 5 -; ZVFHMIN-NEXT: add a0, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a1, a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret - %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, splat (i1 true), i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negb, %va, %negc, %m, i32 %evl) ret %v } -define @vfnmadd_vv_nxv32f16_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { -; ZVFH-LABEL: vfnmadd_vv_nxv32f16_unmasked_commuted: +define @vfnmadd_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { +; ZVFH-LABEL: vfnmadd_vv_nxv32f16_unmasked: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vl8re16.v v24, (a0) ; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; ZVFH-NEXT: vfnmadd.vv v8, v16, v24 ; ZVFH-NEXT: ret ; -; ZVFHMIN-LABEL: vfnmadd_vv_nxv32f16_unmasked_commuted: +; ZVFHMIN-LABEL: vfnmadd_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: sub sp, sp, a2 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a3, a3, a2 -; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 ; ZVFHMIN-NEXT: add a2, a2, a3 -; ZVFHMIN-NEXT: sub sp, sp, a2 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb -; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v8 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v0, v0, a2 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a4, a0, 5 -; ZVFHMIN-NEXT: add a0, a4, a0 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 ; ZVFHMIN-NEXT: slli a0, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a1, a0 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a3 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v0, v24, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v8, v24, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: mv a4, a2 ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a4, a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 5 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: bltu a1, a0, .LBB288_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: .LBB288_2: +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 +; ZVFHMIN-NEXT: ret + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, splat (i1 true), i32 %evl) + ret %v +} + +define @vfnmadd_vv_nxv32f16_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { +; ZVFH-LABEL: vfnmadd_vv_nxv32f16_unmasked_commuted: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vl8re16.v v24, (a0) +; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFH-NEXT: vfnmadd.vv v8, v16, v24 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfnmadd_vv_nxv32f16_unmasked_commuted: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: sub sp, sp, a2 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2 +; ZVFHMIN-NEXT: slli a0, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a4, a1, a0 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 +; ZVFHMIN-NEXT: sltu a3, a1, a4 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 5 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 4 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfncvt.f.f.w v28, v8 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB289_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB289_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a2, a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a2, a0, 4 -; ZVFHMIN-NEXT: add a0, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v0, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v8 -; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a1, a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -10159,124 +9759,120 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a4, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a4, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a4, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v4, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB290_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB290_2: -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 +; ZVFHMIN-NEXT: vxor.vx v24, v8, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a2, a3, a2 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 5 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB290_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB290_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -10307,111 +9903,89 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a4, 8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a4, v0.t -; ZVFHMIN-NEXT: vxor.vx v24, v16, a4, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: vmv4r.v v20, v28 -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v4, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB291_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB291_2: -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 +; ZVFHMIN-NEXT: vxor.vx v24, v8, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a2, a3, a2 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB291_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB291_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -10420,11 +9994,46 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -10453,101 +10062,95 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: lui a2, 8 -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vmv.v.x v24, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a4, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a2 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: vxor.vx v16, v16, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a2, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: sub a4, a0, a1 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a4 ; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a2, a3, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfncvt.f.f.w v28, v8 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB292_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB292_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 -; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v8 -; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -10578,100 +10181,97 @@ define @vfnmadd_vf_nxv32f16_unmasked_commute( @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a4, 8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a4, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a4, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v4, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB294_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB294_2: -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a2, a3, a2 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB294_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB294_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -10839,49 +10460,28 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_unmasked( @vfnmadd_vf_nxv32f16_neg_splat_unmasked( @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: sub sp, sp, a2 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 2 ; ZVFHMIN-NEXT: add a2, a2, a3 -; ZVFHMIN-NEXT: sub sp, sp, a2 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: slli a0, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a4, a1, a0 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a1, a4 +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vl8re16.v v24, (a0) -; ZVFHMIN-NEXT: lui a2, 8 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a3, a1 -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t -; ZVFHMIN-NEXT: slli a2, a0, 1 -; ZVFHMIN-NEXT: vmv4r.v v4, v20 -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a1, a2, .LBB299_2 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a1, a0, .LBB299_2 ; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB299_2: -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sub a2, a1, a2 -; ZVFHMIN-NEXT: srli a0, a0, 2 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 5 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: sltu a1, a1, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 -; ZVFHMIN-NEXT: addi a1, a1, -1 -; ZVFHMIN-NEXT: and a1, a1, a2 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a2, a0 +; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 @@ -11582,157 +11227,100 @@ define @vfnmsub_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: sub sp, sp, a2 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a3, a3, a2 -; ZVFHMIN-NEXT: slli a2, a2, 2 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 ; ZVFHMIN-NEXT: add a2, a2, a3 -; ZVFHMIN-NEXT: sub sp, sp, a2 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vl8re16.v v24, (a0) -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a2, a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: vmset.m v8 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a4, a0, 5 -; ZVFHMIN-NEXT: add a0, a4, a0 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a0, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a1, a0 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v16, v7, a3 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 -; ZVFHMIN-NEXT: csrr a5, vlenb -; ZVFHMIN-NEXT: mv a6, a5 -; ZVFHMIN-NEXT: slli a5, a5, 3 -; ZVFHMIN-NEXT: add a6, a6, a5 -; ZVFHMIN-NEXT: slli a5, a5, 1 -; ZVFHMIN-NEXT: add a5, a5, a6 -; ZVFHMIN-NEXT: add a5, sp, a5 -; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v0, v16, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v8, v24, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: mv a4, a2 ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a4, a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 5 -; ZVFHMIN-NEXT: add a2, a4, a2 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a4, a2, 4 -; ZVFHMIN-NEXT: add a2, a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 5 -; ZVFHMIN-NEXT: add a2, a3, a2 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 5 -; ZVFHMIN-NEXT: add a2, a3, a2 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB300_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB300_2: ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a2, a2, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a2 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a2, a0, 5 -; ZVFHMIN-NEXT: add a0, a2, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a1, a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -11757,142 +11345,101 @@ define @vfnmsub_vv_nxv32f16_unmasked_commuted( @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a3, 8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 5 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v8, a3, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: vmv4r.v v12, v20 -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: bltu a0, a2, .LBB302_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB302_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vxor.vx v16, v8, a1, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a4, a0, a1 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v4, v28 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 +; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -11974,73 +11503,87 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v24, a2 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 5 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB302_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB302_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -12070,116 +11613,130 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a3, 8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a2 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a3, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: mv a3, a0 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a4, a0, a1 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB303_2 +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a2 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB303_2 ; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB303_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v4, v28 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -12204,51 +11761,41 @@ define @vfnmsub_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 2 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v7 +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v8, a1 -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 ; ZVFHMIN-NEXT: sltu a3, a0, a4 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: addi a4, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v8, v24 +; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a4, a2 @@ -12256,64 +11803,59 @@ define @vfnmsub_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v28, v8 +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB304_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB304_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 -; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v0 -; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -12338,46 +11880,41 @@ define @vfnmsub_vf_nxv32f16_unmasked_commute( @vfnmsub_vf_nxv32f16_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -12488,113 +12020,110 @@ define @vfnmsub_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a3, 8 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a2 -; ZVFHMIN-NEXT: vxor.vx v24, v16, a3, v0.t -; ZVFHMIN-NEXT: slli a2, a1, 1 -; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: vmv4r.v v20, v28 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a3, a2 -; ZVFHMIN-NEXT: .LBB306_2: -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v4, v12 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 -; ZVFHMIN-NEXT: sub a2, a0, a2 -; ZVFHMIN-NEXT: srli a1, a1, 2 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 -; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 5 -; ZVFHMIN-NEXT: add a3, sp, a3 -; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a2, a3, a2 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 -; ZVFHMIN-NEXT: mv a4, a3 -; ZVFHMIN-NEXT: slli a3, a3, 1 -; ZVFHMIN-NEXT: add a3, a3, a4 +; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: sltu a0, a0, a2 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1 -; ZVFHMIN-NEXT: addi a0, a0, -1 -; ZVFHMIN-NEXT: and a0, a0, a2 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 5 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: bltu a0, a1, .LBB306_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: mv a0, a1 +; ZVFHMIN-NEXT: .LBB306_2: +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -12630,106 +12159,93 @@ define @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfma_vv_nxv1f16_double_neg( %a, < ; ; ZVFHMIN-LABEL: vfma_vv_nxv1f16_double_neg: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %nega = call @llvm.vp.fneg.nxv1f16( %a, %m, i32 %evl) %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll index 74d289951530a..4523b43274eff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll @@ -18,12 +18,12 @@ define @vfmax_vv_nxv1bf16( %va, @llvm.vp.maxnum.nxv1bf16( %va, %vb, %m, i32 %evl) ret %v @@ -37,7 +37,7 @@ define @vfmax_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.maxnum.nxv1bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -50,12 +50,12 @@ define @vfmax_vv_nxv2bf16( %va, @llvm.vp.maxnum.nxv2bf16( %va, %vb, %m, i32 %evl) ret %v @@ -69,7 +69,7 @@ define @vfmax_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.maxnum.nxv2bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -82,12 +82,12 @@ define @vfmax_vv_nxv4bf16( %va, @llvm.vp.maxnum.nxv4bf16( %va, %vb, %m, i32 %evl) ret %v @@ -101,7 +101,7 @@ define @vfmax_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vv v10, v12, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maxnum.nxv4bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -114,12 +114,12 @@ define @vfmax_vv_nxv8bf16( %va, @llvm.vp.maxnum.nxv8bf16( %va, %vb, %m, i32 %evl) ret %v @@ -133,7 +133,7 @@ define @vfmax_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vv v12, v16, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.maxnum.nxv8bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -146,12 +146,12 @@ define @vfmax_vv_nxv16bf16( %va, @llvm.vp.maxnum.nxv16bf16( %va, %vb, %m, i32 %evl) ret %v @@ -165,7 +165,7 @@ define @vfmax_vv_nxv16bf16_unmasked( @llvm.vp.maxnum.nxv16bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -181,10 +181,18 @@ define @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv1f16( %va, @llvm.vp.maxnum.nxv1f16( %va, %vb, %m, i32 %evl) ret %v @@ -316,7 +352,7 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) @@ -335,12 +371,12 @@ define @vfmax_vv_nxv2f16( %va, @llvm.vp.maxnum.nxv2f16( %va, %vb, %m, i32 %evl) ret %v @@ -360,7 +396,7 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) @@ -379,12 +415,12 @@ define @vfmax_vv_nxv4f16( %va, @llvm.vp.maxnum.nxv4f16( %va, %vb, %m, i32 %evl) ret %v @@ -404,7 +440,7 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) @@ -423,12 +459,12 @@ define @vfmax_vv_nxv8f16( %va, @llvm.vp.maxnum.nxv8f16( %va, %vb, %m, i32 %evl) ret %v @@ -448,7 +484,7 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) @@ -467,12 +503,12 @@ define @vfmax_vv_nxv16f16( %va, @llvm.vp.maxnum.nxv16f16( %va, %vb, %m, i32 %evl) ret %v @@ -492,7 +528,7 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maxnum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) @@ -514,10 +550,18 @@ define @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -606,7 +678,7 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll index 1aaddef39bc1c..a621dc282beb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -18,12 +18,12 @@ define @vfmin_vv_nxv1bf16( %va, @llvm.vp.minnum.nxv1bf16( %va, %vb, %m, i32 %evl) ret %v @@ -37,7 +37,7 @@ define @vfmin_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.minnum.nxv1bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -50,12 +50,12 @@ define @vfmin_vv_nxv2bf16( %va, @llvm.vp.minnum.nxv2bf16( %va, %vb, %m, i32 %evl) ret %v @@ -69,7 +69,7 @@ define @vfmin_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.minnum.nxv2bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -82,12 +82,12 @@ define @vfmin_vv_nxv4bf16( %va, @llvm.vp.minnum.nxv4bf16( %va, %vb, %m, i32 %evl) ret %v @@ -101,7 +101,7 @@ define @vfmin_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vv v10, v12, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minnum.nxv4bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -114,12 +114,12 @@ define @vfmin_vv_nxv8bf16( %va, @llvm.vp.minnum.nxv8bf16( %va, %vb, %m, i32 %evl) ret %v @@ -133,7 +133,7 @@ define @vfmin_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vv v12, v16, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.minnum.nxv8bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -146,12 +146,12 @@ define @vfmin_vv_nxv16bf16( %va, @llvm.vp.minnum.nxv16bf16( %va, %vb, %m, i32 %evl) ret %v @@ -165,7 +165,7 @@ define @vfmin_vv_nxv16bf16_unmasked( @llvm.vp.minnum.nxv16bf16( %va, %vb, splat (i1 true), i32 %evl) @@ -181,10 +181,18 @@ define @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv1f16( %va, @llvm.vp.minnum.nxv1f16( %va, %vb, %m, i32 %evl) ret %v @@ -316,7 +352,7 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) @@ -335,12 +371,12 @@ define @vfmin_vv_nxv2f16( %va, @llvm.vp.minnum.nxv2f16( %va, %vb, %m, i32 %evl) ret %v @@ -360,7 +396,7 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) @@ -379,12 +415,12 @@ define @vfmin_vv_nxv4f16( %va, @llvm.vp.minnum.nxv4f16( %va, %vb, %m, i32 %evl) ret %v @@ -404,7 +440,7 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) @@ -423,12 +459,12 @@ define @vfmin_vv_nxv8f16( %va, @llvm.vp.minnum.nxv8f16( %va, %vb, %m, i32 %evl) ret %v @@ -448,7 +484,7 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) @@ -467,12 +503,12 @@ define @vfmin_vv_nxv16f16( %va, @llvm.vp.minnum.nxv16f16( %va, %vb, %m, i32 %evl) ret %v @@ -492,7 +528,7 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minnum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) @@ -514,10 +550,18 @@ define @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -606,7 +678,7 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll index 06f74dd995748..c1617cd365216 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll @@ -20,12 +20,12 @@ define @vfmul_vv_nxv1f16( %va, @llvm.vp.fmul.nxv1f16( %va, %b, %m, i32 %evl) ret %v @@ -45,7 +45,7 @@ define @vfmul_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv1f16( %va, %b, splat (i1 true), i32 %evl) @@ -64,12 +64,12 @@ define @vfmul_vf_nxv1f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -93,7 +93,7 @@ define @vfmul_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -114,12 +114,12 @@ define @vfmul_vv_nxv2f16( %va, @llvm.vp.fmul.nxv2f16( %va, %b, %m, i32 %evl) ret %v @@ -139,7 +139,7 @@ define @vfmul_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv2f16( %va, %b, splat (i1 true), i32 %evl) @@ -158,12 +158,12 @@ define @vfmul_vf_nxv2f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -187,7 +187,7 @@ define @vfmul_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -208,12 +208,12 @@ define @vfmul_vv_nxv4f16( %va, @llvm.vp.fmul.nxv4f16( %va, %b, %m, i32 %evl) ret %v @@ -233,7 +233,7 @@ define @vfmul_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv4f16( %va, %b, splat (i1 true), i32 %evl) @@ -252,12 +252,12 @@ define @vfmul_vf_nxv4f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -281,7 +281,7 @@ define @vfmul_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -302,12 +302,12 @@ define @vfmul_vv_nxv8f16( %va, @llvm.vp.fmul.nxv8f16( %va, %b, %m, i32 %evl) ret %v @@ -327,7 +327,7 @@ define @vfmul_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv8f16( %va, %b, splat (i1 true), i32 %evl) @@ -346,12 +346,12 @@ define @vfmul_vf_nxv8f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -375,7 +375,7 @@ define @vfmul_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -396,12 +396,12 @@ define @vfmul_vv_nxv16f16( %va, @llvm.vp.fmul.nxv16f16( %va, %b, %m, i32 %evl) ret %v @@ -421,7 +421,7 @@ define @vfmul_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fmul.nxv16f16( %va, %b, splat (i1 true), i32 %evl) @@ -440,12 +440,12 @@ define @vfmul_vf_nxv16f16( %va, half %b ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -469,7 +469,7 @@ define @vfmul_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -493,10 +493,18 @@ define @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB21_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -585,7 +621,7 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 @@ -610,76 +646,76 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 4 -; ZVFHMIN-NEXT: add a1, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a3, a1, 3 -; ZVFHMIN-NEXT: add a1, a3, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv.v.x v8, a1 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmul.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfmul.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfmul.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -703,19 +739,14 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -724,41 +755,30 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmul.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB23_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index 6193fdd38a642..4336b27eb134a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -18,11 +18,11 @@ define @vfsqrt_vv_nxv1bf16( %va, @llvm.vp.sqrt.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -35,7 +35,7 @@ define @vfsqrt_vv_nxv1bf16_unmasked( ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v9, v9 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv1bf16( %va, splat (i1 true), i32 %evl) @@ -48,11 +48,11 @@ define @vfsqrt_vv_nxv2bf16( %va, @llvm.vp.sqrt.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -65,7 +65,7 @@ define @vfsqrt_vv_nxv2bf16_unmasked( ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v9, v9 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv2bf16( %va, splat (i1 true), i32 %evl) @@ -78,11 +78,11 @@ define @vfsqrt_vv_nxv4bf16( %va, @llvm.vp.sqrt.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -95,7 +95,7 @@ define @vfsqrt_vv_nxv4bf16_unmasked( ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v10, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv4bf16( %va, splat (i1 true), i32 %evl) @@ -108,11 +108,11 @@ define @vfsqrt_vv_nxv8bf16( %va, @llvm.vp.sqrt.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -125,7 +125,7 @@ define @vfsqrt_vv_nxv8bf16_unmasked( ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v12, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv8bf16( %va, splat (i1 true), i32 %evl) @@ -138,11 +138,11 @@ define @vfsqrt_vv_nxv16bf16( %va, < ; CHECK-LABEL: vfsqrt_vv_nxv16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -155,7 +155,7 @@ define @vfsqrt_vv_nxv16bf16_unmasked( @llvm.vp.sqrt.nxv16bf16( %va, splat (i1 true), i32 %evl) @@ -175,25 +175,25 @@ define @vfsqrt_vv_nxv32bf16( %va, < ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: sltu a4, a0, a3 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v24, v24, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfsqrt.v v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfsqrt.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv32bf16( %va, %m, i32 %evl) ret %v @@ -210,15 +210,15 @@ define @vfsqrt_vv_nxv32bf16_unmasked( @vfsqrt_vv_nxv32bf16_unmasked( @llvm.vp.sqrt.nxv32bf16( %va, splat (i1 true), i32 %evl) @@ -245,11 +245,11 @@ define @vfsqrt_vv_nxv1f16( %va, @llvm.vp.sqrt.nxv1f16( %va, %m, i32 %evl) ret %v @@ -268,7 +268,7 @@ define @vfsqrt_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v9, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv1f16( %va, splat (i1 true), i32 %evl) @@ -287,11 +287,11 @@ define @vfsqrt_vv_nxv2f16( %va, @llvm.vp.sqrt.nxv2f16( %va, %m, i32 %evl) ret %v @@ -310,7 +310,7 @@ define @vfsqrt_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v9, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv2f16( %va, splat (i1 true), i32 %evl) @@ -329,11 +329,11 @@ define @vfsqrt_vv_nxv4f16( %va, @llvm.vp.sqrt.nxv4f16( %va, %m, i32 %evl) ret %v @@ -352,7 +352,7 @@ define @vfsqrt_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v10, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv4f16( %va, splat (i1 true), i32 %evl) @@ -371,11 +371,11 @@ define @vfsqrt_vv_nxv8f16( %va, @llvm.vp.sqrt.nxv8f16( %va, %m, i32 %evl) ret %v @@ -394,7 +394,7 @@ define @vfsqrt_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v12, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv8f16( %va, splat (i1 true), i32 %evl) @@ -413,11 +413,11 @@ define @vfsqrt_vv_nxv16f16( %va, @llvm.vp.sqrt.nxv16f16( %va, %m, i32 %evl) ret %v @@ -436,7 +436,7 @@ define @vfsqrt_vv_nxv16f16_unmasked( %v ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v16, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv16f16( %va, splat (i1 true), i32 %evl) @@ -462,25 +462,25 @@ define @vfsqrt_vv_nxv32f16( %va, @llvm.vp.sqrt.nxv32f16( %va, %m, i32 %evl) ret %v @@ -503,15 +503,15 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: sltu a4, a0, a3 ; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: and a3, a4, a3 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 +; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -520,7 +520,7 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v16, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.sqrt.nxv32f16( %va, splat (i1 true), i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 08664fd1c4819..059408a1c9c3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -18,12 +18,12 @@ define @vfsub_vv_nxv1bf16( %va, @llvm.vp.fsub.nxv1bf16( %va, %b, %m, i32 %evl) ret %v @@ -37,7 +37,7 @@ define @vfsub_vv_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.fsub.nxv1bf16( %va, %b, splat (i1 true), i32 %evl) @@ -50,12 +50,12 @@ define @vfsub_vf_nxv1bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -73,7 +73,7 @@ define @vfsub_vf_nxv1bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v9, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -88,12 +88,12 @@ define @vfsub_vv_nxv2bf16( %va, @llvm.vp.fsub.nxv2bf16( %va, %b, %m, i32 %evl) ret %v @@ -107,7 +107,7 @@ define @vfsub_vv_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v9, v9, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.fsub.nxv2bf16( %va, %b, splat (i1 true), i32 %evl) @@ -120,12 +120,12 @@ define @vfsub_vf_nxv2bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -143,7 +143,7 @@ define @vfsub_vf_nxv2bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v9, v10, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -158,12 +158,12 @@ define @vfsub_vv_nxv4bf16( %va, @llvm.vp.fsub.nxv4bf16( %va, %b, %m, i32 %evl) ret %v @@ -177,7 +177,7 @@ define @vfsub_vv_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v10, v12, v10 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.fsub.nxv4bf16( %va, %b, splat (i1 true), i32 %evl) @@ -190,12 +190,12 @@ define @vfsub_vf_nxv4bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v10, v10, v12, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -213,7 +213,7 @@ define @vfsub_vf_nxv4bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v10, v10, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -228,12 +228,12 @@ define @vfsub_vv_nxv8bf16( %va, @llvm.vp.fsub.nxv8bf16( %va, %b, %m, i32 %evl) ret %v @@ -247,7 +247,7 @@ define @vfsub_vv_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v12, v16, v12 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vp.fsub.nxv8bf16( %va, %b, splat (i1 true), i32 %evl) @@ -260,12 +260,12 @@ define @vfsub_vf_nxv8bf16( %va, bfloa ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v12, v12, v16, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -283,7 +283,7 @@ define @vfsub_vf_nxv8bf16_unmasked( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v12, v12, v16 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 @@ -298,12 +298,12 @@ define @vfsub_vv_nxv16bf16( %va, @llvm.vp.fsub.nxv16bf16( %va, %b, %m, i32 %evl) ret %v @@ -317,7 +317,7 @@ define @vfsub_vv_nxv16bf16_unmasked( @llvm.vp.fsub.nxv16bf16( %va, %b, splat (i1 true), i32 %evl) @@ -330,12 +330,12 @@ define @vfsub_vf_nxv16bf16( %va, bf ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -353,7 +353,7 @@ define @vfsub_vf_nxv16bf16_unmasked( poison, bfloat %b, i32 0 @@ -371,10 +371,18 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32bf16_unmasked( @vfsub_vv_nxv32bf16_unmasked( @vfsub_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a2, a1, 4 -; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfsub.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: vfsub.vv v24, v8, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 +; CHECK-NEXT: vfsub.vv v24, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -563,19 +599,14 @@ define @vfsub_vf_nxv32bf16_unmasked( @vfsub_vf_nxv32bf16_unmasked( @vfsub_vv_nxv1f16( %va, @llvm.vp.fsub.nxv1f16( %va, %b, %m, i32 %evl) ret %v @@ -666,7 +686,7 @@ define @vfsub_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv1f16( %va, %b, splat (i1 true), i32 %evl) @@ -685,12 +705,12 @@ define @vfsub_vf_nxv1f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -714,7 +734,7 @@ define @vfsub_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -735,12 +755,12 @@ define @vfsub_vv_nxv2f16( %va, @llvm.vp.fsub.nxv2f16( %va, %b, %m, i32 %evl) ret %v @@ -760,7 +780,7 @@ define @vfsub_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv2f16( %va, %b, splat (i1 true), i32 %evl) @@ -779,12 +799,12 @@ define @vfsub_vf_nxv2f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -808,7 +828,7 @@ define @vfsub_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v10, v8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -829,12 +849,12 @@ define @vfsub_vv_nxv4f16( %va, @llvm.vp.fsub.nxv4f16( %va, %b, %m, i32 %evl) ret %v @@ -854,7 +874,7 @@ define @vfsub_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv4f16( %va, %b, splat (i1 true), i32 %evl) @@ -873,12 +893,12 @@ define @vfsub_vf_nxv4f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -902,7 +922,7 @@ define @vfsub_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v10, v10, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -923,12 +943,12 @@ define @vfsub_vv_nxv8f16( %va, @llvm.vp.fsub.nxv8f16( %va, %b, %m, i32 %evl) ret %v @@ -948,7 +968,7 @@ define @vfsub_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv8f16( %va, %b, splat (i1 true), i32 %evl) @@ -967,12 +987,12 @@ define @vfsub_vf_nxv8f16( %va, half %b, < ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -996,7 +1016,7 @@ define @vfsub_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v12, v12, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1017,12 +1037,12 @@ define @vfsub_vv_nxv16f16( %va, @llvm.vp.fsub.nxv16f16( %va, %b, %m, i32 %evl) ret %v @@ -1042,7 +1062,7 @@ define @vfsub_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fsub.nxv16f16( %va, %b, splat (i1 true), i32 %evl) @@ -1061,12 +1081,12 @@ define @vfsub_vf_nxv16f16( %va, half %b ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1090,7 +1110,7 @@ define @vfsub_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -1114,10 +1134,18 @@ define @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -1206,7 +1262,7 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 @@ -1231,76 +1287,76 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a2, a1, 4 -; ZVFHMIN-NEXT: add a1, a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a3, a1, 3 -; ZVFHMIN-NEXT: add a1, a3, a1 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv.v.x v8, a1 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: addi a3, sp, 16 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a3, a2, 3 -; ZVFHMIN-NEXT: add a2, a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsub.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsub.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB46_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: vfsub.vv v24, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 4 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -1324,19 +1380,14 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vmv.v.x v16, a1 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -1345,41 +1396,30 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 -; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: addi a3, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsub.vv v16, v8, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB47_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 ; ZVFHMIN-NEXT: addi sp, sp, 16