From 2456349b36cfcaa760aa70e1a35efb3332a524b9 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 14 Oct 2024 06:44:04 -0700 Subject: [PATCH] [RISCV][VLOPT] Add support for 11.10 div instructions --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 +- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 160 +++++++++++++++++++ 2 files changed, 168 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 90af9ef898d95..ff7bfe4702d1c 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -546,7 +546,14 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMULHSU_VV: case RISCV::VMULHSU_VX: // 11.11. Vector Integer Divide Instructions - // FIXME: Add support for 11.11 instructions + case RISCV::VDIVU_VV: + case RISCV::VDIVU_VX: + case RISCV::VDIV_VV: + case RISCV::VDIV_VX: + case RISCV::VREMU_VV: + case RISCV::VREMU_VX: + case RISCV::VREM_VV: + case RISCV::VREM_VX: // 11.12. Vector Widening Integer Multiply Instructions // FIXME: Add support for 11.12 instructions // 11.13. Vector Single-Width Integer Multiply-Add Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 107252338829b..ca98cd96733b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -804,6 +804,166 @@ define @vmulhsu_vx( %a, i32 %b, iXLen %vl) ret %2 } +define @vdivu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vdivu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vdivu.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vdivu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vdivu.vv v8, v8, v10 +; VLOPT-NEXT: vmul.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vdivu.nxv4i32.nxv4i32( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vdivu_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vdivu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vdivu.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vdivu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vdivu.vx v10, v8, a0 +; VLOPT-NEXT: vmul.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vdivu.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vdiv_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vdiv_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vdiv.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vdiv_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vdiv.vv v8, v8, v10 +; VLOPT-NEXT: vmul.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vdiv.nxv4i32.nxv4i32( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vdiv_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vdiv_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vdiv.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vdiv_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vdiv.vx v10, v8, a0 +; VLOPT-NEXT: vmul.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vdiv.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vremu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vremu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vremu.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vremu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vremu.vv v8, v8, v10 +; VLOPT-NEXT: vmul.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vremu.nxv4i32.nxv4i32( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vremu_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vremu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vremu.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vremu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vremu.vx v10, v8, a0 +; VLOPT-NEXT: vmul.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vremu.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vrem_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vrem_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vrem.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vrem_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vrem.vv v8, v8, v10 +; VLOPT-NEXT: vmul.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vrem.nxv4i32.nxv4i32( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vrem_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vrem_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vrem.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vmul.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vrem_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vrem.vx v10, v8, a0 +; VLOPT-NEXT: vmul.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vrem.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vmul.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + define @vwmacc_vx( %a, i16 %b, iXLen %vl) { ; NOVLOPT-LABEL: vwmacc_vx: ; NOVLOPT: # %bb.0: