From d88114426572d9a5f9efffb87a384ca34229b47a Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 07:56:56 -0800 Subject: [PATCH 1/5] [RISCV][VLOPT] Add vector single width floating point add subtract instructions to isSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 6 ++ .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 26 ++--- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 100 ++++++++++++++++++ 3 files changed, 116 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 9a0938bc38dd4..1acc88a92eba8 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -979,6 +979,12 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMSOF_M: case RISCV::VIOTA_M: case RISCV::VID_V: + // Vector Single-Width Floating-Point Add/Subtract Instructions + case RISCV::VFADD_VF: + case RISCV::VFADD_VV: + case RISCV::VFSUB_VF: + case RISCV::VFSUB_VV: + case RISCV::VFRSUB_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 66952cac8e00d..ce23dd0eac203 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -93,12 +93,11 @@ define void @fadd_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -229,12 +228,11 @@ define void @fsub_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2330,13 +2328,12 @@ define void @fadd_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2472,13 +2469,12 @@ define void @fadd_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2614,13 +2610,12 @@ define void @fsub_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2756,13 +2751,12 @@ define void @fsub_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -5004,13 +4998,13 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -5181,13 +5175,13 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 55a50a15c788c..a53b837db5461 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -2925,3 +2925,103 @@ define @vid.v( %c, iXLen %vl) { %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %c, iXLen %vl) ret %2 } + +define @vfadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfadd_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfadd_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfadd_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfsub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfsub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsub.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfsub.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsub.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfsub.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfrsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfrsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrsub.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfrsub.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrsub.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} From 007b97ea3675531f0c66b5beee9fed5f359c48af Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 08:22:16 -0800 Subject: [PATCH 2/5] [RISCV][VLOPT] Add vector widening floating point add subtract instructions to isSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 + llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 168 +++++++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 1acc88a92eba8..faa9d849c226f 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -985,6 +985,15 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFSUB_VF: case RISCV::VFSUB_VV: case RISCV::VFRSUB_VF: + // Vector Widening Floating-Point Add/Subtract Instructions + case RISCV::VFWADD_VV: + case RISCV::VFWADD_VF: + case RISCV::VFWSUB_VV: + case RISCV::VFWSUB_VF: + case RISCV::VFWADD_WF: + case RISCV::VFWADD_WV: + case RISCV::VFWSUB_WF: + case RISCV::VFWSUB_WV: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index a53b837db5461..804a8a614a820 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3025,3 +3025,171 @@ define @vfrsub_vx( %a, float %b, iXLen %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) ret %2 } + +define @vfwadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_vx( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_wv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.wv v8, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.wv v8, v8, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwadd_wf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwadd_wf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwadd.wf v8, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwadd_wf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwadd.wf v8, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_wv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_wv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.wv v8, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_wv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.wv v8, v8, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwsub_wf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwsub_wf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwsub.wf v8, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwsub_wf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwsub.wf v8, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} From ff6a1e96cca6200113de4d93d5c161a971de71cb Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 08:28:59 -0800 Subject: [PATCH 3/5] [RISCV][VLOPT] Add floating point multiply divide instructions to getSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 6 ++ .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 24 ++--- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 100 ++++++++++++++++++ 3 files changed, 114 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index faa9d849c226f..74eb870045a7f 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -994,6 +994,12 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFWADD_WV: case RISCV::VFWSUB_WF: case RISCV::VFWSUB_WV: + // Vector Single-Width Floating-Point Multiply/Divide Instructions + case RISCV::VFMUL_VF: + case RISCV::VFMUL_VV: + case RISCV::VFDIV_VF: + case RISCV::VFDIV_VV: + case RISCV::VFRDIV_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index ce23dd0eac203..b8710a518287a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -363,12 +363,11 @@ define void @fmul_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -499,12 +498,11 @@ define void @fdiv_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -2892,13 +2890,12 @@ define void @fmul_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3034,13 +3031,12 @@ define void @fmul_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3176,13 +3172,12 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -3318,13 +3313,12 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v9, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret @@ -4993,12 +4987,11 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 @@ -5170,12 +5163,11 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vle16.v v8, (a1) ; ZVFHMIN-NEXT: vle16.v v9, (a0) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 804a8a614a820..bdacd17fe75c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3193,3 +3193,103 @@ define @vfwsub_wf( %a, float %b, iXLe %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) ret %2 } + +define @vfmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfmul.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfmul.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfmul_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfmul_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfmul.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfmul_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfmul.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfdiv_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfdiv_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfdiv.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfdiv_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfdiv.vv v8, v8, v10 +; VLOPT-NEXT: vfadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %b, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfdiv_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfdiv_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfdiv.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfdiv_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfdiv.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfrdiv_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfrdiv_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrdiv.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrdiv_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfrdiv.vf v10, v8, fa0 +; VLOPT-NEXT: vfadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrdiv.nxv4f32.nxv4f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) + ret %2 +} From c95db94136f9b288d5834f0e279376a83587ea26 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 09:55:05 -0800 Subject: [PATCH 4/5] [RISCV][VLOPT] Add widening floating point multiply to isSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 3 ++ llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 42 ++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 74eb870045a7f..8ac39b744f418 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1000,6 +1000,9 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFDIV_VF: case RISCV::VFDIV_VV: case RISCV::VFRDIV_VF: + // Vector Widening Floating-Point Multiply + case RISCV::VFWMUL_VF: + case RISCV::VFWMUL_VV: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index bdacd17fe75c0..46cca43a1be89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3293,3 +3293,45 @@ define @vfrdiv_vf( %a, float %b, iXLen %2 = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( poison, %1, %a, iXLen 7, iXLen %vl) ret %2 } + +define @vfwmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmul.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwmul.vv v12, v8, v10 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( poison, %a, %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmul_vf( %a, float %b, iXLen %vl) { +; NOVLOPT-LABEL: vfwmul_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmul.vf v12, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmul_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vfwmul.vf v12, v8, fa0 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( poison, %a, float %b, iXLen 7, iXLen -1) + %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) + ret %2 +} From a5ed9538dd61caf98190bb6d639eb828025180aa Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 9 Jan 2025 10:01:15 -0800 Subject: [PATCH 5/5] [RISCV][VLOPT] Add Vector Floating-Point Compare Instructions to getSupportedInstr --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 11 + llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 200 +++++++++++++++++++ 2 files changed, 211 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 8ac39b744f418..2ebf4c6d7f04e 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1003,6 +1003,17 @@ static bool isSupportedInstr(const MachineInstr &MI) { // Vector Widening Floating-Point Multiply case RISCV::VFWMUL_VF: case RISCV::VFWMUL_VV: + // Vector Floating-Point Compare Instructions + case RISCV::VMFEQ_VF: + case RISCV::VMFEQ_VV: + case RISCV::VMFNE_VF: + case RISCV::VMFNE_VV: + case RISCV::VMFLT_VF: + case RISCV::VMFLT_VV: + case RISCV::VMFLE_VF: + case RISCV::VMFLE_VV: + case RISCV::VMFGT_VF: + case RISCV::VMFGE_VF: // Single-Width Floating-Point/Integer Type-Convert Instructions case RISCV::VFCVT_XU_F_V: case RISCV::VFCVT_X_F_V: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 46cca43a1be89..46fbba35c35a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3335,3 +3335,203 @@ define @vfwmul_vf( %a, float %b, iXLen %2 = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( poison, %1, %1, iXLen 7, iXLen %vl) ret %2 } + +define @vmfeq_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfeq_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfeq.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfeq_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfeq.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfeq.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfeq_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfeq_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfeq.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfeq_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfeq.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfeq.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfne_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfne_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfne.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfne_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfne.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfne.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfne_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfne_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfne.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfne_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfne.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfne.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmflt_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmflt_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmflt_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmflt.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmflt_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmflt_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmflt_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmflt.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfle_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfle_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfle.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfle_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfle.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfle.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfle_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfle_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfle.vv v12, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfle_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfle.vv v12, v8, v10 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfle.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfgt_vf( %a, %b, float%c, iXLen %vl) { +; NOVLOPT-LABEL: vmfgt_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmfgt.vf v10, v8, fa0 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v10, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfgt_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmfgt.vf v10, v8, fa0 +; VLOPT-NEXT: vmand.mm v0, v10, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfgt.nxv4f32.f32( %a, float %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +} + +define @vmfgt_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vmfgt_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vmflt.vv v12, v10, v8 +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLOPT-NEXT: vmand.mm v0, v12, v0 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vmfgt_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vmflt.vv v12, v10, v8 +; VLOPT-NEXT: vmand.mm v0, v12, v0 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vmfgt.nxv4f32.nxv4f32( %a, %c, iXLen -1) + %2 = call @llvm.riscv.vmand.nxv4i1( %1, %b, iXLen %vl) + ret %2 +}