diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 9a0938bc38dd4..2ebf4c6d7f04e 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -979,6 +979,41 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMSOF_M: case RISCV::VIOTA_M: case RISCV::VID_V: + // Vector Single-Width Floating-Point Add/Subtract Instructions + case RISCV::VFADD_VF: + case RISCV::VFADD_VV: + case RISCV::VFSUB_VF: + case RISCV::VFSUB_VV: + case RISCV::VFRSUB_VF: + // Vector Widening Floating-Point Add/Subtract Instructions + case RISCV::VFWADD_VV: + case RISCV::VFWADD_VF: + case RISCV::VFWSUB_VV: + case RISCV::VFWSUB_VF: + case RISCV::VFWADD_WF: + case RISCV::VFWADD_WV: + case RISCV::VFWSUB_WF: + case RISCV::VFWSUB_WV: + // Vector Single-Width Floating-Point Multiply/Divide Instructions + case RISCV::VFMUL_VF: + case RISCV::VFMUL_VV: + case RISCV::VFDIV_VF: + case RISCV::VFDIV_VV: + case RISCV::VFRDIV_VF: + // Vector Widening Floating-Point Multiply + case RISCV::VFWMUL_VF: + case RISCV::VFWMUL_VV: + // Vector Floating-Point Compare Instructions + case RISCV::VMFEQ_VF: + case RISCV::VMFEQ_VV: + case RISCV::VMFNE_VF: + case RISCV::VMFNE_VV: + case RISCV::VMFLT_VF: + case RISCV::VMFLT_VV: + case RISCV::VMFLE_VF: + case RISCV::VMFLE_VV: + case RISCV::VMFGT_VF: + case RISCV::VMFGE_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 66952cac8e00d..b8710a518287a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -93,12 +93,11 @@ define void @fadd_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -229,12 +228,11 @@ define void @fsub_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -365,12 +363,11 @@ define void @fmul_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -501,12 +498,11 @@ define void @fdiv_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2330,13 +2326,12 @@ define void @fadd_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2472,13 +2467,12 @@ define void @fadd_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2614,13 +2608,12 @@ define void @fsub_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2756,13 +2749,12 @@ define void @fsub_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2898,13 +2890,12 @@ define void @fmul_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3040,13 +3031,12 @@ define void @fmul_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3182,13 +3172,12 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3324,13 +3313,12 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -4999,7 +4987,6 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -5010,7 +4997,7 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -5176,7 +5163,6 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -5187,7 +5173,7 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 55a50a15c788c..46fbba35c35a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -2925,3 +2925,613 @@ define @vid.v( %c, iXLen %vl) { %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %c, iXLen %vl) ret %2 } + +define @vfadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfadd_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfadd_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfadd_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfsub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfsub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsub.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfsub.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsub.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfsub.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfrsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfrsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrsub.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfrsub.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrsub.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_wv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.wv v8, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.wv v8, v8, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_wf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_wf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.wf v8, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_wf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.wf v8, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_wv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.wv v8, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.wv v8, v8, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_wf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_wf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.wf v8, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_wf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.wf v8, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfmul.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfmul.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfmul_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfmul_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfmul.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfmul_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfmul.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfdiv_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfdiv_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfdiv.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfdiv_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfdiv.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfdiv_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfdiv_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfdiv.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfdiv_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfdiv.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfrdiv_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfrdiv_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrdiv.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrdiv_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfrdiv.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrdiv.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmul.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwmul.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmul_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwmul_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmul.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmul_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwmul.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vmfeq_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfeq_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfeq.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfeq_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfeq.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfeq.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfeq_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfeq_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfeq.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfeq_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfeq.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfeq.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfne_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfne_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfne.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfne_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfne.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfne.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfne_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfne_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfne.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfne_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfne.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfne.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmflt_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmflt_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmflt_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmflt.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmflt_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmflt_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmflt_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmflt.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfle_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfle_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfle.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfle_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfle.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfle.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfle_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfle_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfle.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfle_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfle.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfle.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfgt_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfgt_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfgt.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfgt_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfgt.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfgt.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfgt_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfgt_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vv v12, v10, v8 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfgt_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vv v12, v10, v8 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfgt.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +}