diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index d4829bced2470..2d0e2a423bc48 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -976,6 +976,17 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMV_V_I: case RISCV::VMV_V_X: case RISCV::VMV_V_V: + // Vector Single-Width Saturating Add and Subtract + case RISCV::VSADDU_VV: + case RISCV::VSADDU_VX: + case RISCV::VSADDU_VI: + case RISCV::VSADD_VV: + case RISCV::VSADD_VX: + case RISCV::VSADD_VI: + case RISCV::VSSUBU_VV: + case RISCV::VSSUBU_VX: + case RISCV::VSSUB_VV: + case RISCV::VSSUB_VX: // Vector Single-Width Averaging Add and Subtract case RISCV::VAADDU_VV: case RISCV::VAADDU_VX: @@ -985,6 +996,23 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VASUBU_VX: case RISCV::VASUB_VV: case RISCV::VASUB_VX: + // Vector Single-Width Fractional Multiply with Rounding and Saturation + case RISCV::VSMUL_VV: + case RISCV::VSMUL_VX: + // Vector Single-Width Scaling Shift Instructions + case RISCV::VSSRL_VV: + case RISCV::VSSRL_VX: + case RISCV::VSSRL_VI: + case RISCV::VSSRA_VV: + case RISCV::VSSRA_VX: + case RISCV::VSSRA_VI: + // Vector Narrowing Fixed-Point Clip Instructions + case RISCV::VNCLIPU_WV: + case RISCV::VNCLIPU_WX: + case RISCV::VNCLIPU_WI: + case RISCV::VNCLIP_WV: + case RISCV::VNCLIP_WX: + case RISCV::VNCLIP_WI: // Vector Crypto case RISCV::VWSLL_VI: @@ -1173,8 +1201,16 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { const MCInstrDesc &Desc = MI.getDesc(); if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) return false; - if (MI.getNumDefs() != 1) + + if (MI.getNumExplicitDefs() != 1) + return false; + + // Some instructions have implicit defs e.g. $vxsat. If they might be read + // later then we can't reduce VL. + if (!MI.allImplicitDefsAreDead()) { + LLVM_DEBUG(dbgs() << "Not a candidate because has non-dead implicit def\n"); return false; + } if (MI.mayRaiseFPException()) { LLVM_DEBUG(dbgs() << "Not a candidate because may raise FP exception\n"); diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index d329979857a6b..403cc0eb9dce1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -894,9 +894,10 @@ define void @test_dag_loop() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmclr.m v0 +; CHECK-NEXT: vsetivli zero, 0, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu +; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, mu ; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmseq.vv v0, v12, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 053f1209cf214..2411e5359b403 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -2511,6 +2511,514 @@ define @vwmaccus_vx( %a, i16 %b, %2 } +define @vsaddu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vsaddu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsaddu.vv v10, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsaddu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vsaddu.vv v10, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsaddu( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vsaddu_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vsaddu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsaddu.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsaddu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vsaddu.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsaddu( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vsaddu_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vsaddu_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsaddu.vi v10, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsaddu_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vsaddu.vi v10, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsaddu( poison, %a, i32 5, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vsadd_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vsadd_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsadd.vv v10, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsadd_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vsadd.vv v10, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsadd( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vsadd_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vsadd_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsadd.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsadd_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vsadd.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsadd( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vsadd_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vsadd_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsadd.vi v10, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsadd_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vsadd.vi v10, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsadd( poison, %a, i32 5, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssubu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vssubu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssubu.vv v10, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssubu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vssubu.vv v10, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssubu( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssubu_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vssubu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssubu.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssubu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vssubu.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssubu( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssub_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vssub_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssub.vv v10, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssub_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vssub.vv v10, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssub( poison, %a, %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssub_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vssub_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssub.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssub_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vssub.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssub( poison, %a, i32 %b, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vsmul_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vsmul_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsmul.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsmul_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vsmul.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsmul.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vsmul_vx( %a, i32 %b, iXLen %vl) { +; NOVLOPT-LABEL: vsmul_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vsmul.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vsmul_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vsmul.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vsmul.nxv4i32.nxv4i32( poison, %a, i32 %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssrl_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vssrl_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssrl.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssrl_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vssrl.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssrl.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vssrl_vx( %a, iXLen %b, iXLen %vl) { +; NOVLOPT-LABEL: vssrl_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssrl.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssrl_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vssrl.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssrl.nxv4i32.nxv4i32( poison, %a, iXLen %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssrl_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vssrl_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssrl.vi v10, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssrl_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vssrl.vi v10, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssrl.nxv4i32.nxv4i32( poison, %a, iXLen 5, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssra_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vssra_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssra.vv v8, v8, v10 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssra_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vssra.vv v8, v8, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssra.nxv4i32.nxv4i32( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %b, iXLen %vl) + ret %2 +} + +define @vssra_vx( %a, iXLen %b, iXLen %vl) { +; NOVLOPT-LABEL: vssra_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssra.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssra_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vssra.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssra.nxv4i32.nxv4i32( poison, %a, iXLen %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vssra_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vssra_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vssra.vi v10, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vssra_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vssra.vi v10, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v10, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vssra.nxv4i32.nxv4i32( poison, %a, iXLen 5, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %a, iXLen %vl) + ret %2 +} + +define @vnclipu_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vnclipu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vnclipu.wv v14, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v14, v14 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnclipu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vnclipu.wv v14, v8, v12 +; VLOPT-NEXT: vadd.vv v8, v14, v14 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnclipu( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vnclipu_vx( %a, iXLen %b, iXLen %vl) { +; NOVLOPT-LABEL: vnclipu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vnclipu.wx v12, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnclipu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vnclipu.wx v12, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnclipu( poison, %a, iXLen %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vnclipu_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vnclipu_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vnclipu.wi v12, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnclipu_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vnclipu.wi v12, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnclipu( poison, %a, iXLen 5, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vnclip_vv( %a, %b, iXLen %vl) { +; NOVLOPT-LABEL: vnclip_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vnclip.wv v14, v8, v12 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v14, v14 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnclip_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vnclip.wv v14, v8, v12 +; VLOPT-NEXT: vadd.vv v8, v14, v14 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnclip( poison, %a, %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vnclip_vx( %a, iXLen %b, iXLen %vl) { +; NOVLOPT-LABEL: vnclip_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vnclip.wx v12, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnclip_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vnclip.wx v12, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnclip( poison, %a, iXLen %b, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vnclip_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vnclip_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: csrwi vxrm, 0 +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vnclip.wi v12, v8, 5 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v12, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vnclip_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: csrwi vxrm, 0 +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vnclip.wi v12, v8, 5 +; VLOPT-NEXT: vadd.vv v8, v12, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vnclip( poison, %a, iXLen 5, iXLen 0, iXLen -1) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + define @vmv_v_i( %a, i32 %x, iXLen %vl) { ; NOVLOPT-LABEL: vmv_v_i: ; NOVLOPT: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 78054c73d848f..0475a988e9851 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -270,3 +270,23 @@ body: | %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ %b:vr = PseudoVADD_VV_M1 $noreg, %a, $noreg, 2, 3 /* e8 */, 0 /* tu, mu */ ... +--- +name: vxsat_dead +body: | + bb.0: + ; CHECK-LABEL: name: vxsat_dead + ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat + %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +... +--- +name: vxsat_not_dead +body: | + bb.0: + ; CHECK-LABEL: name: vxsat_not_dead + ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat + ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ + %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat + %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */ +...