diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index d4829bced2470..6c19a8fd32d42 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -545,6 +545,8 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VFWMSAC_VV: case RISCV::VFWNMSAC_VF: case RISCV::VFWNMSAC_VV: + case RISCV::VFWMACCBF16_VV: + case RISCV::VFWMACCBF16_VF: // Vector Widening Floating-Point Add/Subtract Instructions // Dest EEW=2*SEW. Source EEW=SEW. case RISCV::VFWADD_VV: @@ -1050,6 +1052,17 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFMSUB_VF: case RISCV::VFNMSUB_VV: case RISCV::VFNMSUB_VF: + // Vector Widening Floating-Point Fused Multiply-Add Instructions + case RISCV::VFWMACC_VV: + case RISCV::VFWMACC_VF: + case RISCV::VFWNMACC_VV: + case RISCV::VFWNMACC_VF: + case RISCV::VFWMSAC_VV: + case RISCV::VFWMSAC_VF: + case RISCV::VFWNMSAC_VV: + case RISCV::VFWNMSAC_VF: + case RISCV::VFWMACCBF16_VV: + case RISCV::VFWMACCBF16_VF: // Vector Floating-Point MIN/MAX Instructions case RISCV::VFMIN_VF: case RISCV::VFMIN_VV: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 053f1209cf214..f4591a191c8b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT ; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer @@ -4351,3 +4351,213 @@ define @vfnmsub_vf( %a, float %b, @llvm.riscv.vfadd( poison, %1, %c, iXLen 7, iXLen %vl) ret %2 } + +define @vfwmacc_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwmacc_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmacc_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwmacc.vv v8, v12, v14 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmacc( %a, %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmacc_vf( %a, float %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwmacc_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwmacc.vf v8, fa0, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmacc_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwmacc.vf v8, fa0, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmacc( %a, float %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwnmacc_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwnmacc_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwnmacc.vv v8, v12, v14 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwnmacc_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwnmacc.vv v8, v12, v14 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwnmacc( %a, %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwnmacc_vf( %a, float %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwnmacc_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwnmacc.vf v8, fa0, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwnmacc_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwnmacc.vf v8, fa0, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwnmacc( %a, float %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmsac_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwmsac_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwmsac.vv v8, v12, v14 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmsac_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwmsac.vv v8, v12, v14 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmsac( %a, %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmsac_vf( %a, float %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwmsac_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwmsac.vf v8, fa0, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmsac_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwmsac.vf v8, fa0, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmsac( %a, float %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwnmsac_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwnmsac_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwnmsac.vv v8, v12, v14 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwnmsac_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwnmsac.vv v8, v12, v14 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwnmsac( %a, %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwnmsac_vf( %a, float %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwnmsac_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; NOVLOPT-NEXT: vfwnmsac.vf v8, fa0, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v16 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwnmsac_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; VLOPT-NEXT: vfwnmsac.vf v8, fa0, v12 +; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v16 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwnmsac( %a, float %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmaccbf16_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwmaccbf16_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmaccbf16_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; VLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmaccbf16( %a, %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +} + +define @vfwmaccbf16_vf( %a, bfloat %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vfwmaccbf16_vf: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vfadd.vv v8, v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfwmaccbf16_vf: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; VLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vfadd.vv v8, v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfwmaccbf16( %a, bfloat %b, %c, iXLen 7, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) + ret %2 +}