diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index ee494c4681511..2869891014978 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -404,7 +404,19 @@ static OperandInfo getOperandInfo(const MachineInstr &MI, case RISCV::VWMULSU_VV: case RISCV::VWMULSU_VX: case RISCV::VWMULU_VV: - case RISCV::VWMULU_VX: { + case RISCV::VWMULU_VX: + // Vector Widening Integer Multiply-Add Instructions + // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL. + // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which + // is then added to the 2*SEW-bit Dest. These instructions never have a + // passthru operand. + case RISCV::VWMACCU_VV: + case RISCV::VWMACCU_VX: + case RISCV::VWMACC_VV: + case RISCV::VWMACC_VX: + case RISCV::VWMACCSU_VV: + case RISCV::VWMACCSU_VX: + case RISCV::VWMACCUS_VX: { unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW; RISCVII::VLMUL EMUL = IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul; @@ -419,18 +431,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI, case RISCV::VWADD_WV: case RISCV::VWADD_WX: case RISCV::VWSUB_WV: - case RISCV::VWSUB_WX: - // Vector Widening Integer Multiply-Add Instructions - // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL. - // Even though the add is a 2*SEW addition, the operands of the add are the - // Dest which is 2*SEW and the result of the multiply which is 2*SEW. - case RISCV::VWMACCU_VV: - case RISCV::VWMACCU_VX: - case RISCV::VWMACC_VV: - case RISCV::VWMACC_VX: - case RISCV::VWMACCSU_VV: - case RISCV::VWMACCSU_VX: - case RISCV::VWMACCUS_VX: { + case RISCV::VWSUB_WX: { bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1; bool TwoTimes = IsMODef || IsOp1; unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW; @@ -572,9 +573,13 @@ static bool isSupportedInstr(const MachineInstr &MI) { // Vector Single-Width Integer Multiply-Add Instructions // FIXME: Add support // Vector Widening Integer Multiply-Add Instructions - // FIXME: Add support - case RISCV::VWMACC_VX: + case RISCV::VWMACCU_VV: case RISCV::VWMACCU_VX: + case RISCV::VWMACC_VV: + case RISCV::VWMACC_VX: + case RISCV::VWMACCSU_VV: + case RISCV::VWMACCSU_VX: + case RISCV::VWMACCUS_VX: // Vector Integer Merge Instructions // FIXME: Add support // Vector Integer Move Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 11f603b56b6e5..39cc90b812f99 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -1248,44 +1248,149 @@ define @vwmulu_vx( %a, i32 %b, iXLen %vl) { ret %2 } -define @vwmacc_vx( %a, i16 %b, iXLen %vl) { +define @vwmacc_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vwmacc_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vwmacc_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; VLOPT-NEXT: vwmacc.vv v8, v10, v11 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( %a, %b, %c, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %d, iXLen %vl) + ret %2 +} + +define @vwmacc_vx( %a, i16 %b, %c, iXLen %vl) { ; NOVLOPT-LABEL: vwmacc_vx: ; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwmacc.vx v10, a0, v8 +; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10 ; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vwmacc_vx: ; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; VLOPT-NEXT: vwmacc.vx v10, a0, v8 +; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; VLOPT-NEXT: vwmacc.vx v8, a0, v10 ; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v8 ; VLOPT-NEXT: ret - %1 = call @llvm.riscv.vwmacc.nxv4i32.i16( poison, i16 %b, %a, iXLen -1, iXLen 0) + %1 = call @llvm.riscv.vwmacc.nxv4i32.i16( %a, i16 %b, %c, iXLen -1, iXLen 0) %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) ret %2 } -define @vwmaccu_vx( %a, i16 %b, iXLen %vl) { +define @vwmaccu_vv( %a, %b, %c, %d, iXLen %vl) { +; NOVLOPT-LABEL: vwmaccu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmaccu.vv v8, v10, v11 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vwmaccu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; VLOPT-NEXT: vwmaccu.vv v8, v10, v11 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vwmaccu.nxv4i32.nxv4i16( %a, %b, %c, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %d, iXLen %vl) + ret %2 +} + +define @vwmaccu_vx( %a, i16 %b, %c, %d, i32 %e, iXLen %vl) { ; NOVLOPT-LABEL: vwmaccu_vx: ; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; NOVLOPT-NEXT: vwmaccu.vx v10, a0, v8 -; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; NOVLOPT-NEXT: vadd.vv v8, v10, v10 +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10 +; NOVLOPT-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v12 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vwmaccu_vx: ; VLOPT: # %bb.0: -; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; VLOPT-NEXT: vwmaccu.vx v10, a0, v8 +; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma +; VLOPT-NEXT: vwmaccu.vx v8, a0, v10 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vwmaccu.nxv4i32.i16( %a, i16 %b, %c, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %d, iXLen %vl) + ret %2 +} + +define @vwmaccsu_vv( %a, %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vwmaccsu_vv: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmaccsu.vv v8, v10, v11 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vwmaccsu_vv: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; VLOPT-NEXT: vwmaccsu.vv v8, v10, v11 ; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; VLOPT-NEXT: vadd.vv v8, v10, v10 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16( %a, %b, %c, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vwmaccsu_vx( %a, i16 %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vwmaccsu_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmaccsu.vx v8, a0, v10 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vwmaccsu_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; VLOPT-NEXT: vwmaccsu.vx v8, a0, v10 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vwmaccsu.nxv4i32.i16( %a, i16 %b, %c, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vwmaccus_vx( %a, i16 %b, %c, iXLen %vl) { +; NOVLOPT-LABEL: vwmaccus_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma +; NOVLOPT-NEXT: vwmaccus.vx v8, a0, v10 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vwmaccus_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; VLOPT-NEXT: vwmaccus.vx v8, a0, v10 +; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VLOPT-NEXT: vadd.vv v8, v8, v8 ; VLOPT-NEXT: ret - %1 = call @llvm.riscv.vwmaccu.nxv4i32.i16( poison, i16 %b, %a, iXLen -1, iXLen 0) + %1 = call @llvm.riscv.vwmaccus.nxv4i32.i16( %a, i16 %b, %c, iXLen -1, iXLen 0) %2 = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %1, %1, iXLen %vl) ret %2 } diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 1a1472fcfc66f..0410ca34f7ba1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -136,3 +136,17 @@ define @different_imm_vl_with_tu( %passthru %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a,iXLen 4) ret %w } + +define @dont_optimize_tied_def( %a, %b, %c, iXLen %vl) { +; CHECK-LABEL: dont_optimize_tied_def: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma +; CHECK-NEXT: vwmacc.vv v8, v10, v11 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vwmacc.vv v8, v10, v11 +; CHECK-NEXT: ret + %1 = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( %a, %b, %c, iXLen -1, iXLen 0) + %2 = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( %1, %b, %c, iXLen %vl, iXLen 0) + ret %2 +} +