From 00095e1e42a7075dfc258808962de386e4f35a5d Mon Sep 17 00:00:00 2001 From: Yeting Kuo <46629943+fakepaper56@users.noreply.github.com> Date: Tue, 31 Jan 2023 09:52:06 +0800 Subject: [PATCH 01/13] [VP][RISCV] Add vp isd opcodes VP_MULH/VP_MULHS Add ISD opcodes VP_MULH/VP_MULHS which could be used by VP optimizations. --- llvm/include/llvm/IR/VPIntrinsics.def | 5 +++++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 8 ++++---- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 +++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 55f4719da7c8b..e71ca44779adb 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -180,6 +180,11 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor, XOR) #undef HELPER_REGISTER_BINARY_INT_VP +BEGIN_REGISTER_VP_SDNODE(VP_MULHU, -1, vp_mulhs, 2, 3) +END_REGISTER_VP_SDNODE(VP_MULHU) +BEGIN_REGISTER_VP_SDNODE(VP_MULHS, -1, vp_mulhs, 2, 3) +END_REGISTER_VP_SDNODE(VP_MULHS) + // llvm.vp.smin(x,y,mask,vlen) BEGIN_REGISTER_VP(vp_smin, 2, 3, VP_SMIN, -1) VP_PROPERTY_BINARYOP diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1000235ab4061..6e2f37d7c3dd4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1277,8 +1277,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::VP_ADD: case ISD::SUB: case ISD::VP_SUB: case ISD::MUL: case ISD::VP_MUL: - case ISD::MULHS: - case ISD::MULHU: + case ISD::MULHS: case ISD::VP_MULHS: + case ISD::MULHU: case ISD::VP_MULHU: case ISD::ABDS: case ISD::ABDU: case ISD::AVGCEILS: @@ -4552,8 +4552,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::VP_ADD: case ISD::AND: case ISD::VP_AND: case ISD::MUL: case ISD::VP_MUL: - case ISD::MULHS: - case ISD::MULHU: + case ISD::MULHS: case ISD::VP_MULHS: + case ISD::MULHU: case ISD::VP_MULHU: case ISD::ABDS: case ISD::ABDU: case ISD::OR: case ISD::VP_OR: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8e3caf51d876b..3a4f1fefa9445 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -696,6 +696,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, + ISD::VP_MULHU, ISD::VP_MULHS, ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF, @@ -6410,6 +6411,8 @@ static unsigned getRISCVVLOp(SDValue Op) { VP_CASE(ADD) // VP_ADD VP_CASE(SUB) // VP_SUB VP_CASE(MUL) // VP_MUL + VP_CASE(MULHS) // VP_MULHS + VP_CASE(MULHU) // VP_MULHU VP_CASE(SDIV) // VP_SDIV VP_CASE(SREM) // VP_SREM VP_CASE(UDIV) // VP_UDIV @@ -7605,6 +7608,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_ADD: case ISD::VP_SUB: case ISD::VP_MUL: + case ISD::VP_MULHS: + case ISD::VP_MULHU: case ISD::VP_SDIV: case ISD::VP_UDIV: case ISD::VP_SREM: From bf0b608a2a52e8b6a9edf7cc725c85de7bd47858 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Fri, 10 Feb 2023 11:52:47 +0800 Subject: [PATCH 02/13] [LLVM][VP] Optimize divide by constant for VP intrinsics This patch implemented divide by constants foldings for vp.u(s)div and vp.u(s)rem as well as some other minor foldings such as div by pow of 2, div by INT_MAX, etc. --- llvm/include/llvm/CodeGen/TargetLowering.h | 4 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 330 ++++ .../CodeGen/SelectionDAG/TargetLowering.cpp | 253 +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 + llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 1540 +++++++++++++++++ 5 files changed, 2137 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 04ee24c0916e5..6447752c451d8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5108,6 +5108,10 @@ class TargetLowering : public TargetLoweringBase { SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const; + SDValue BuildVPSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + SmallVectorImpl &Created) const; + SDValue BuildVPUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + SmallVectorImpl &Created) const; /// Targets may override this function to provide custom SDIV lowering for /// power-of-2 denominators. If the target returns an empty SDValue, LLVM diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8858c2012c706..74ab35f8c5f05 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -564,6 +564,14 @@ namespace { SDValue visitFSUBForFMACombine(SDNode *N); SDValue visitFMULForFMADistributiveCombine(SDNode *N); + SDValue visitVPUDIV(SDNode *N); + SDValue visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N); + SDValue BuildVPUDIV(SDNode *N); + SDValue visitVPSDIV(SDNode *N); + SDValue visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N); + SDValue BuildVPSDIV(SDNode *N); + SDValue visitVPREM(SDNode *N); + SDValue XformToShuffleWithZero(SDNode *N); bool reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, @@ -5161,6 +5169,59 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return SDValue(); } +// handles ISD::VP_SREM and ISD::VP_UREM +SDValue DAGCombiner::visitVPREM(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + EVT VT = N->getValueType(0); + EVT CCVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorElementCount()); + + bool IsSigned = (Opcode == ISD::VP_SREM); + SDLoc DL(N); + + // fold (vp.urem X, -1) -> select(FX == -1, 0, FX) + // Freeze the numerator to avoid a miscompile with an undefined value. + if (!IsSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) { + SDValue F0 = DAG.getFreeze(N0); + SDValue EqualsNeg1 = DAG.getSetCCVP(DL, CCVT, F0, N1, ISD::SETEQ, Mask, VL); + return DAG.getNode(ISD::VP_SELECT, DL, VT, EqualsNeg1, + DAG.getConstant(0, DL, VT), F0, VL); + } + + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the + // speculative DIV must not cause a DIVREM conversion. We guard against this + // by skipping the simplification if isIntDivCheap(). When div is not cheap, + // combine will not return a DIVREM. Regardless, checking cheapness here + // makes sense since the simplification results in fatter code. + if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { + SDValue OptimizedDiv = + IsSigned ? visitVPSDIVLike(N0, N1, N) : visitVPUDIVLike(N0, N1, N); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) { + // If the equivalent Div node also exists, update its users. + unsigned DivOpcode = IsSigned ? ISD::VP_SDIV : ISD::VP_UDIV; + if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), + {N0, N1, Mask, VL})) + CombineTo(DivNode, OptimizedDiv); + SDValue Mul = + DAG.getNode(ISD::VP_MUL, DL, VT, OptimizedDiv, N1, Mask, VL); + SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Mul, Mask, VL); + AddToWorklist(OptimizedDiv.getNode()); + AddToWorklist(Mul.getNode()); + return Sub; + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -27219,6 +27280,268 @@ SDValue DAGCombiner::visitVP_FSUB(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::BuildVPUDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction().hasMinSize()) + return SDValue(); + + SmallVector Built; + if (SDValue S = TLI.BuildVPUDIV(N, DAG, LegalOperations, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + +/// Given an ISD::VP_SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue DAGCombiner::BuildVPSDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction().hasMinSize()) + return SDValue(); + + SmallVector Built; + if (SDValue S = TLI.BuildVPSDIV(N, DAG, LegalOperations, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitVPUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + // fold (vp.udiv X, -1) -> vp.select(X == -1, 1, 0) + if (N1C && N1C->isAllOnes()) { + EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorElementCount()); + return DAG.getNode(ISD::VP_SELECT, DL, VT, + DAG.getSetCCVP(DL, CCVT, N0, N1, ISD::SETEQ, Mask, VL), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), + VL); + } + + if (SDValue V = visitVPUDIVLike(N0, N1, N)) { + // If the corresponding remainder node exists, update its users with + // (Dividend - (Quotient * Divisor). + if (SDNode *RemNode = DAG.getNodeIfExists(ISD::VP_UREM, N->getVTList(), + {N0, N1, Mask, VL})) { + SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, V, N1, Mask, VL); + SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Mul, Mask, VL); + AddToWorklist(Mul.getNode()); + AddToWorklist(Sub.getNode()); + CombineTo(RemNode, Sub); + } + return V; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + EVT VT = N->getValueType(0); + + // fold (vp.udiv x, (1 << c)) -> vp.lshr(x, c) + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1)) { + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::VP_SRL, DL, VT, N0, Trunc, Mask, VL); + } + + // fold (vp.udiv x, (vp.shl c, y)) -> vp.lshr(x, vp.add(log2(c)+y)) iff c is + // power of 2 + if (N1.getOpcode() == ISD::VP_SHL && N1->getOperand(2) == Mask && + N1->getOperand(3) == VL) { + SDValue N10 = N1.getOperand(0); + if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N10)) { + SDValue LogBase2 = BuildLogBase2(N10, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); + AddToWorklist(Trunc.getNode()); + SDValue Add = DAG.getNode(ISD::VP_ADD, DL, ADDVT, N1.getOperand(1), Trunc, + Mask, VL); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::VP_SRL, DL, VT, N0, Add, Mask, VL); + } + } + + // fold (vp.udiv x, Splat(shl c, y)) -> vp.lshr(x, add(log2(c)+y)) iff c is + // power of 2 + if (N1.getOpcode() == ISD::SPLAT_VECTOR) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::SHL) { + SDValue N0SHL = N10.getOperand(0); + if (isa(N0SHL) && DAG.isKnownToBeAPowerOfTwo(N0SHL)) { + SDValue LogBase2 = BuildLogBase2(N0SHL, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ADDVT = N10.getOperand(1).getValueType(); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); + AddToWorklist(Trunc.getNode()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, ADDVT, N10.getOperand(1), Trunc); + AddToWorklist(Add.getNode()); + SDValue Splat = DAG.getSplatVector(VT, DL, Add); + AddToWorklist(Splat.getNode()); + return DAG.getNode(ISD::VP_SRL, DL, VT, N0, Splat, Mask, VL); + } + } + } + + // fold (udiv x, c) -> alternate + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isConstantOrConstantVector(N1) && + !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildVPUDIV(N)) + return Op; + return SDValue(); +} + +SDValue DAGCombiner::visitVPSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // fold (vp.sdiv X, -1) -> 0-X + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N1C && N1C->isAllOnes()) + return DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), N0, + Mask, VL); + + // fold (vp.sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) + if (N1C && N1C->getAPIntValue().isMinSignedValue()) { + EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorElementCount()); + return DAG.getNode(ISD::VP_SELECT, DL, VT, + DAG.getSetCCVP(DL, CCVT, N0, N1, ISD::SETEQ, Mask, VL), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), + VL); + } + + // If we know the sign bits of both operands are zero, strength reduce to a + // vp.udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::VP_UDIV, DL, N1.getValueType(), N0, N1, Mask, VL); + + if (SDValue V = visitVPSDIVLike(N0, N1, N)) { + // If the corresponding remainder node exists, update its users with + // (Dividend - (Quotient * Divisor). + if (SDNode *RemNode = DAG.getNodeIfExists(ISD::VP_SREM, N->getVTList(), + {N0, N1, Mask, VL})) { + SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, V, N1, Mask, VL); + SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Mul, Mask, VL); + AddToWorklist(Mul.getNode()); + AddToWorklist(Sub.getNode()); + CombineTo(RemNode, Sub); + } + return V; + } + return SDValue(); +} + +SDValue DAGCombiner::visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + EVT VT = N->getValueType(0); + unsigned BitWidth = VT.getScalarSizeInBits(); + + // fold (vp.sdiv X, V of pow 2) + if (N1.getOpcode() == ISD::SPLAT_VECTOR && + isDivisorPowerOfTwo(N1.getOperand(0))) { + // Create constants that are functions of the shift amount value. + SDValue N = N1.getOperand(0); + EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorElementCount()); + EVT ScalarShiftAmtTy = + getShiftAmountTy(N0.getValueType().getVectorElementType()); + SDValue Bits = DAG.getConstant(BitWidth, DL, ScalarShiftAmtTy); + SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT.getVectorElementType(), N); + C1 = DAG.getZExtOrTrunc(C1, DL, ScalarShiftAmtTy); + SDValue Inexact = DAG.getNode(ISD::SUB, DL, ScalarShiftAmtTy, Bits, C1); + if (!isa(Inexact)) + return SDValue(); + + // Splat the sign bit into the register + EVT VecShiftAmtTy = EVT::getVectorVT(*DAG.getContext(), ScalarShiftAmtTy, + VT.getVectorElementCount()); + SDValue Sign = + DAG.getNode(ISD::VP_SRA, DL, VT, N0, + DAG.getConstant(BitWidth - 1, DL, VecShiftAmtTy), Mask, VL); + AddToWorklist(Sign.getNode()); + + // Add N0, ((N0 < 0) ? abs(N1) - 1 : 0); + Inexact = DAG.getSplat(VT, DL, Inexact); + C1 = DAG.getSplat(VT, DL, C1); + SDValue Srl = DAG.getNode(ISD::VP_SRL, DL, VT, Sign, Inexact, Mask, VL); + AddToWorklist(Srl.getNode()); + SDValue Add = DAG.getNode(ISD::VP_ADD, DL, VT, N0, Srl, Mask, VL); + AddToWorklist(Add.getNode()); + SDValue Sra = DAG.getNode(ISD::VP_SRA, DL, VT, Add, C1, Mask, VL); + AddToWorklist(Sra.getNode()); + + // Special case: (sdiv X, 1) -> X + // Special Case: (sdiv X, -1) -> 0-X + SDValue One = DAG.getConstant(1, DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue IsOne = DAG.getSetCCVP(DL, CCVT, N1, One, ISD::SETEQ, Mask, VL); + SDValue IsAllOnes = + DAG.getSetCCVP(DL, CCVT, N1, AllOnes, ISD::SETEQ, Mask, VL); + SDValue IsOneOrAllOnes = + DAG.getNode(ISD::VP_OR, DL, CCVT, IsOne, IsAllOnes, Mask, VL); + Sra = DAG.getNode(ISD::VP_SELECT, DL, VT, IsOneOrAllOnes, N0, Sra, VL); + + // If dividing by a positive value, we're done. Otherwise, the result must + // be negated. + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, Zero, Sra, Mask, VL); + + // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. + SDValue IsNeg = DAG.getSetCCVP(DL, CCVT, N1, Zero, ISD::SETLT, Mask, VL); + SDValue Res = DAG.getNode(ISD::VP_SELECT, DL, VT, IsNeg, Sub, Sra, VL); + return Res; + } + + // If integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. Targets may check function attributes for size/speed + // trade-offs. + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isConstantOrConstantVector(N1) && + !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildVPSDIV(N)) + return Op; + + return SDValue(); +} + SDValue DAGCombiner::visitVPOp(SDNode *N) { if (N->getOpcode() == ISD::VP_GATHER) @@ -27262,6 +27585,13 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) { return visitMUL(N); case ISD::VP_SUB: return foldSubCtlzNot(N, DAG); + case ISD::VP_UDIV: + return visitVPUDIV(N); + case ISD::VP_SDIV: + return visitVPSDIV(N); + case ISD::VP_UREM: + case ISD::VP_SREM: + return visitVPREM(N); default: break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index adfb96041c5c0..82a2500ff386d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6492,6 +6492,121 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } +/// Given an ISD::VP_SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, + bool IsAfterLegalization, + SmallVectorImpl &Created) const { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + unsigned EltBits = VT.getScalarSizeInBits(); + + // Check to see if we can do this. + if (!isTypeLegal(VT)) + return SDValue(); + + SmallVector MagicFactors, Factors, Shifts, ShiftMasks; + + auto BuildSDIVPattern = [&](ConstantSDNode *C) { + if (C->isZero()) + return false; + + const APInt &Divisor = C->getAPIntValue(); + SignedDivisionByConstantInfo magics = + SignedDivisionByConstantInfo::get(Divisor); + int NumeratorFactor = 0; + int ShiftMask = -1; + + if (Divisor.isOne() || Divisor.isAllOnes()) { + // If d is +1/-1, we just multiply the numerator by +1/-1. + NumeratorFactor = Divisor.getSExtValue(); + magics.Magic = 0; + magics.ShiftAmount = 0; + ShiftMask = 0; + } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) { + // If d > 0 and m < 0, add the numerator. + NumeratorFactor = 1; + } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) { + // If d < 0 and m > 0, subtract the numerator. + NumeratorFactor = -1; + } + + MagicFactors.push_back(DAG.getConstant(magics.Magic, DL, SVT)); + Factors.push_back(DAG.getSignedConstant(NumeratorFactor, DL, SVT)); + Shifts.push_back(DAG.getConstant(magics.ShiftAmount, DL, ShSVT)); + ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, DL, SVT)); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + + // Collect the shifts / magic values from each element. + if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern)) + return SDValue(); + + SDValue MagicFactor, Factor, Shift, ShiftMask; + if (N1.getOpcode() == ISD::BUILD_VECTOR) { + MagicFactor = DAG.getBuildVector(VT, DL, MagicFactors); + Factor = DAG.getBuildVector(VT, DL, Factors); + Shift = DAG.getBuildVector(ShVT, DL, Shifts); + ShiftMask = DAG.getBuildVector(VT, DL, ShiftMasks); + } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { + assert(MagicFactors.size() == 1 && Factors.size() == 1 && + Shifts.size() == 1 && ShiftMasks.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + MagicFactor = DAG.getSplatVector(VT, DL, MagicFactors[0]); + Factor = DAG.getSplatVector(VT, DL, Factors[0]); + Shift = DAG.getSplatVector(ShVT, DL, Shifts[0]); + ShiftMask = DAG.getSplatVector(VT, DL, ShiftMasks[0]); + } else { + assert(isa(N1) && "Expected a constant"); + MagicFactor = MagicFactors[0]; + Factor = Factors[0]; + Shift = Shifts[0]; + ShiftMask = ShiftMasks[0]; + } + + // Multiply the numerator (operand 0) by the magic value. + auto GetMULHS = [&](SDValue X, SDValue Y) { + if (isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization)) + return DAG.getNode(ISD::VP_MULHS, DL, VT, X, Y, Mask, VL); + return SDValue(); + }; + + SDValue Q = GetMULHS(N0, MagicFactor); + if (!Q) + return SDValue(); + + Created.push_back(Q.getNode()); + + // (Optionally) Add/subtract the numerator using Factor. + Factor = DAG.getNode(ISD::VP_MUL, DL, VT, N0, Factor, Mask, VL); + Created.push_back(Factor.getNode()); + Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, Factor, Mask, VL); + Created.push_back(Q.getNode()); + + // Shift right algebraic by shift value. + Q = DAG.getNode(ISD::VP_SRA, DL, VT, Q, Shift, Mask, VL); + Created.push_back(Q.getNode()); + + // Extract the sign bit, mask it and add it to the quotient. + SDValue SignShift = DAG.getConstant(EltBits - 1, DL, ShVT); + SDValue T = DAG.getNode(ISD::VP_SRL, DL, VT, Q, SignShift, Mask, VL); + Created.push_back(T.getNode()); + T = DAG.getNode(ISD::VP_AND, DL, VT, T, ShiftMask, Mask, VL); + Created.push_back(T.getNode()); + return DAG.getNode(ISD::VP_ADD, DL, VT, Q, T, Mask, VL); +} + /// Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -6692,6 +6807,144 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// Given an ISD::VP_UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG, + bool IsAfterLegalization, + SmallVectorImpl &Created) const { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + unsigned EltBits = VT.getScalarSizeInBits(); + + // Check to see if we can do this. + if (!isTypeLegal(VT)) + return SDValue(); + + bool UseNPQ = false, UsePreShift = false, UsePostShift = false; + + SmallVector PreShifts, PostShifts, MagicFactors, NPQFactors; + + auto BuildUDIVPattern = [&](ConstantSDNode *C) { + if (C->isZero()) + return false; + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + const APInt &Divisor = C->getAPIntValue(); + SDValue PreShift, MagicFactor, NPQFactor, PostShift; + + // Magic algorithm doesn't work for division by 1. We need to emit a select + // at the end. + if (Divisor.isOne()) { + PreShift = PostShift = DAG.getUNDEF(ShSVT); + MagicFactor = NPQFactor = DAG.getUNDEF(SVT); + } else { + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); + + MagicFactor = DAG.getConstant(magics.Magic, DL, SVT); + + assert(magics.PreShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert(magics.PostShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift"); + PreShift = DAG.getConstant(magics.PreShift, DL, ShSVT); + PostShift = DAG.getConstant(magics.PostShift, DL, ShSVT); + NPQFactor = DAG.getConstant( + magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1) + : APInt::getZero(EltBits), + DL, SVT); + UseNPQ |= magics.IsAdd; + UsePreShift |= magics.PreShift != 0; + UsePostShift |= magics.PostShift != 0; + } + + PreShifts.push_back(PreShift); + MagicFactors.push_back(MagicFactor); + NPQFactors.push_back(NPQFactor); + PostShifts.push_back(PostShift); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue VL = N->getOperand(3); + + // Collect the shifts/magic values from each element. + if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) + return SDValue(); + + SDValue PreShift, PostShift, MagicFactor, NPQFactor; + if (N1.getOpcode() == ISD::BUILD_VECTOR) { + PreShift = DAG.getBuildVector(ShVT, DL, PreShifts); + MagicFactor = DAG.getBuildVector(VT, DL, MagicFactors); + NPQFactor = DAG.getBuildVector(VT, DL, NPQFactors); + PostShift = DAG.getBuildVector(ShVT, DL, PostShifts); + } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { + assert(PreShifts.size() == 1 && MagicFactors.size() == 1 && + NPQFactors.size() == 1 && PostShifts.size() == 1 && + "Expected matchUnaryPredicate to return one for scalable vectors"); + PreShift = DAG.getSplatVector(ShVT, DL, PreShifts[0]); + MagicFactor = DAG.getSplatVector(VT, DL, MagicFactors[0]); + NPQFactor = DAG.getSplatVector(VT, DL, NPQFactors[0]); + PostShift = DAG.getSplatVector(ShVT, DL, PostShifts[0]); + } else { + assert(isa(N1) && "Expected a constant"); + PreShift = PreShifts[0]; + MagicFactor = MagicFactors[0]; + PostShift = PostShifts[0]; + } + + SDValue Q = N0; + if (UsePreShift) { + Q = DAG.getNode(ISD::VP_SRL, DL, VT, Q, PreShift, Mask, VL); + Created.push_back(Q.getNode()); + } + + auto GetMULHU = [&](SDValue X, SDValue Y) { + if (isOperationLegalOrCustom(ISD::VP_MULHU, VT, IsAfterLegalization)) + return DAG.getNode(ISD::VP_MULHU, DL, VT, X, Y, Mask, VL); + return SDValue(); + }; + + // Multiply the numerator (operand 0) by the magic value. + Q = GetMULHU(Q, MagicFactor); + if (!Q) + return SDValue(); + + Created.push_back(Q.getNode()); + + if (UseNPQ) { + SDValue NPQ = DAG.getNode(ISD::VP_SUB, DL, VT, N0, Q, Mask, VL); + Created.push_back(NPQ.getNode()); + + // For vectors we might have a mix of non-NPQ/NPQ paths, so use + // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero. + NPQ = GetMULHU(NPQ, NPQFactor); + Created.push_back(NPQ.getNode()); + + Q = DAG.getNode(ISD::VP_ADD, DL, VT, NPQ, Q, Mask, VL); + Created.push_back(Q.getNode()); + } + + if (UsePostShift) { + Q = DAG.getNode(ISD::VP_SRL, DL, VT, Q, PostShift, Mask, VL); + Created.push_back(Q.getNode()); + } + + EVT SetCCVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorElementCount()); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue IsOne = DAG.getSetCCVP(DL, SetCCVT, N1, One, ISD::SETEQ, Mask, VL); + return DAG.getNode(ISD::VP_SELECT, DL, VT, IsOne, N0, Q, VL); +} + /// If all values in Values that *don't* match the predicate are same 'splat' /// value, then replace all values with that splat value. /// Else, if AlternativeReplacement was provided, then replace all values that diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3a4f1fefa9445..72b6ba0c2d8ce 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -871,6 +871,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(IntegerVPOps, VT, Custom); + // Zve64* does not support VP_MULHU/S with nxvXi64. + if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) { + setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand); + } + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, @@ -1300,6 +1305,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(IntegerVPOps, VT, Custom); + // Zve64* does not support VP_MULHU/S with nxvXi64. + if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) { + setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand); + } + if (Subtarget.hasStdExtZvkb()) setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll new file mode 100644 index 0000000000000..f78a0ec7f2378 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll @@ -0,0 +1,1540 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +declare @llvm.vp.udiv.nxv8i8(, , , i32) +declare @llvm.vp.udiv.nxv4i16(, , , i32) +declare @llvm.vp.udiv.nxv2i32(, , , i32) +declare @llvm.vp.udiv.nxv1i64(, , , i32) +declare @llvm.vp.sdiv.nxv8i8(, , , i32) +declare @llvm.vp.sdiv.nxv4i16(, , , i32) +declare @llvm.vp.sdiv.nxv2i32(, , , i32) +declare @llvm.vp.sdiv.nxv1i64(, , , i32) +declare @llvm.vp.urem.nxv8i8(, , , i32) +declare @llvm.vp.urem.nxv4i16(, , , i32) +declare @llvm.vp.urem.nxv2i32(, , , i32) +declare @llvm.vp.urem.nxv1i64(, , , i32) +declare @llvm.vp.srem.nxv8i8(, , , i32) +declare @llvm.vp.srem.nxv4i16(, , , i32) +declare @llvm.vp.srem.nxv2i32(, , , i32) +declare @llvm.vp.srem.nxv1i64(, , , i32) +declare @llvm.vp.shl.nxv8i8(, , , i32) +declare @llvm.vp.shl.nxv4i16(, , , i32) +declare @llvm.vp.shl.nxv2i32(, , , i32) +declare @llvm.vp.shl.nxv1i64(, , , i32) + + +define @vpudiv_by_max_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_max_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 255, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_max_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_max_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 65535, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_max_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_max_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 4294967295, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_max_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_max_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 18446744073709551615, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @fold_vpudiv_vpurem_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: fold_vpudiv_vpurem_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t +; CHECK-NEXT: li a0, -128 +; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 128, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + %u = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + %x = add %v, %u + ret %x +} + +define @fold_vpudiv_vpurem_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: fold_vpudiv_vpurem_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 16384, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + %u = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + %x = add %v, %u + ret %x +} + +define @fold_vpudiv_vpurem_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: fold_vpudiv_vpurem_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 16384, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + %u = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + %x = add %v, %u + ret %x +} + +define @fold_vpudiv_vpurem_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: fold_vpudiv_vpurem_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 14, v0.t +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 16384, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + %u = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + %x = add %v, %u + ret %x +} + +define @vpudiv_by_shl2_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_shl2_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %sh = shl i8 2, %b + %vec = insertelement undef, i8 %sh, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_shl2_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_shl2_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: srli a0, a0, 48 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %sh = shl i16 2, %b + %vec = insertelement undef, i16 %sh, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_shl2_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_shl2_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %sh = shl i32 2, %b + %vec = insertelement undef, i32 %sh, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_shl2_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_shl2_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 1 +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %sh = shl i64 2, %b + %vec = insertelement undef, i64 %sh, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_vpshl2_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_vpshl2_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec1 = insertelement undef, i8 4, i32 0 + %splat1 = shufflevector %vec1, poison, zeroinitializer + %vec2 = insertelement undef, i8 %b, i32 0 + %splat2 = shufflevector %vec2, poison, zeroinitializer + %sh = call @llvm.vp.shl.nxv8i8( %splat1, %splat2, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, %sh, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_vpshl2_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_vpshl2_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec1 = insertelement undef, i16 4, i32 0 + %splat1 = shufflevector %vec1, poison, zeroinitializer + %vec2 = insertelement undef, i16 %b, i32 0 + %splat2 = shufflevector %vec2, poison, zeroinitializer + %sh = call @llvm.vp.shl.nxv4i16( %splat1, %splat2, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, %sh, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_vpshl2_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_vpshl2_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec1 = insertelement undef, i32 4, i32 0 + %splat1 = shufflevector %vec1, poison, zeroinitializer + %vec2 = insertelement undef, i32 %b, i32 0 + %splat2 = shufflevector %vec2, poison, zeroinitializer + %sh = call @llvm.vp.shl.nxv2i32( %splat1, %splat2, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, %sh, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_vpshl2_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_vpshl2_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec1 = insertelement undef, i64 4, i32 0 + %splat1 = shufflevector %vec1, poison, zeroinitializer + %vec2 = insertelement undef, i64 %b, i32 0 + %splat2 = shufflevector %vec2, poison, zeroinitializer + %sh = call @llvm.vp.shl.nxv1i64( %splat1, %splat2, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, %sh, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_no_add_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_no_add_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: li a1, -51 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_no_add_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_no_add_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: lui a1, 1048573 +; CHECK-NEXT: addiw a1, a1, -819 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_no_add_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_no_add_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: lui a1, 838861 +; CHECK-NEXT: addiw a1, a1, -819 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_no_add_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_no_add_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI19_0) +; CHECK-NEXT: ld a1, %lo(.LCPI19_0)(a1) +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_with_add_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_with_add_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: li a0, -128 +; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t +; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_with_add_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_with_add_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: lui a1, 2 +; CHECK-NEXT: addiw a1, a1, 1171 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t +; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_with_add_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_with_add_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: lui a1, 149797 +; CHECK-NEXT: addiw a1, a1, -1755 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t +; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_with_add_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_with_add_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI23_0) +; CHECK-NEXT: ld a1, %lo(.LCPI23_0)(a1) +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t +; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_neg1_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_neg1_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_neg1_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_neg1_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_neg1_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_neg1_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_neg1_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_neg1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_min_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_min_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 -128, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_min_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_min_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: slli a1, a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 -9223372036854775808, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_min_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_min_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 -32768, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_min_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_min_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 -2147483648, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_pow2_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_pow2_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 14, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 4, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_pow2_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_pow2_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 7, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 6, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 4, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_pow2_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_pow2_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 31, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 30, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 4, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_pow2_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_pow2_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsra.vx v11, v8, a1, v0.t +; CHECK-NEXT: li a1, 62 +; CHECK-NEXT: vsrl.vx v11, v11, a1, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 4, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_no_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_no_ashr_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: li a0, 86 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_no_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_no_ashr_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addiw a0, a0, 1366 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_no_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_no_ashr_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addiw a0, a0, 1366 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_no_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_no_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI39_0) +; CHECK-NEXT: ld a1, %lo(.LCPI39_0)(a1) +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_ashr_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: li a0, 103 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_ashr_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: lui a0, 6 +; CHECK-NEXT: addiw a0, a0, 1639 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_ashr_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: lui a0, 419430 +; CHECK-NEXT: addiw a0, a0, 1639 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: ld a1, %lo(.LCPI43_0)(a1) +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_add_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_add_ashr_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t +; CHECK-NEXT: li a0, -109 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_add_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_add_ashr_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t +; CHECK-NEXT: lui a0, 1048569 +; CHECK-NEXT: addiw a0, a0, -1911 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 15, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_add_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_add_ashr_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t +; CHECK-NEXT: lui a0, 599186 +; CHECK-NEXT: addiw a0, a0, 1171 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_add_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_add_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI47_0) +; CHECK-NEXT: ld a1, %lo(.LCPI47_0)(a1) +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 15, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_sub_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t +; CHECK-NEXT: li a0, 109 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 -7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_sub_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t +; CHECK-NEXT: lui a0, 7 +; CHECK-NEXT: addiw a0, a0, 1911 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 -15, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_sub_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addiw a0, a0, -1171 +; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t +; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 -7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_sub_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI51_0) +; CHECK-NEXT: ld a1, %lo(.LCPI51_0)(a1) +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t +; CHECK-NEXT: vand.vi v9, v9, -1, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 -3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_max_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_max_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 18446744073709551615, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_max_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_max_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 65535, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_max_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_max_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 255, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_max_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_max_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, -1, v0.t +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 4294967295, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_const_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_const_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: lui a1, %hi(.LCPI56_0) +; CHECK-NEXT: ld a1, %lo(.LCPI56_0)(a1) +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_const_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_const_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: lui a1, 1048573 +; CHECK-NEXT: addiw a1, a1, -819 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_const_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_const_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: li a1, -51 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_const_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_const_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: lui a1, 838861 +; CHECK-NEXT: addiw a1, a1, -819 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_const_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_const_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI60_0) +; CHECK-NEXT: ld a1, %lo(.LCPI60_0)(a1) +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: vmulh.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsrl.vx v10, v9, a0, v0.t +; CHECK-NEXT: vand.vi v10, v10, -1, v0.t +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_const_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_const_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: lui a0, 6 +; CHECK-NEXT: addiw a0, a0, 1639 +; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsrl.vi v10, v9, 15, v0.t +; CHECK-NEXT: vand.vi v10, v10, -1, v0.t +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i16 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_const_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_const_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: li a0, 103 +; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsrl.vi v10, v9, 7, v0.t +; CHECK-NEXT: vand.vi v10, v10, -1, v0.t +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i8 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_const_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_const_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t +; CHECK-NEXT: lui a0, 419430 +; CHECK-NEXT: addiw a0, a0, 1639 +; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsrl.vi v10, v9, 31, v0.t +; CHECK-NEXT: vand.vi v10, v10, -1, v0.t +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmul.vx v9, v9, a0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i32 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_1_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i8 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_1_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i16 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i32 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_1_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i64 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_1_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i8 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_1_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i16 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i32 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_by_1_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = insertelement undef, i64 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_1_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_1_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_1_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_1_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_1_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_1_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_neg1_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_neg1_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i8 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_neg1_nxv4i16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_neg1_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i16 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_neg1_nxv2i32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_neg1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i32 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_neg1_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_neg1_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %vec = insertelement undef, i64 -1, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdivrem_nxv8i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdivrem_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 109 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsub.vv v9, v9, v8, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 2, v0.t +; CHECK-NEXT: vsrl.vi v10, v9, 7, v0.t +; CHECK-NEXT: vand.vi v10, v10, -1, v0.t +; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t +; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.srem.nxv8i8( %va, splat (i8 -7), %m, i32 %evl) + %w = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 -7), %m, i32 %evl) + %x = call @llvm.vp.add.nxv8i8( %v, %w, %m, i32 %evl) + ret %x +} + +define @vpudivrem_nxv8i8( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudivrem_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t +; CHECK-NEXT: li a0, -128 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.urem.nxv8i8( %va, splat (i8 7), %m, i32 %evl) + %w = call @llvm.vp.udiv.nxv8i8( %va, splat (i8 7), %m, i32 %evl) + %x = call @llvm.vp.add.nxv8i8( %v, %w, %m, i32 %evl) + ret %x +} From b6137759d0766af3b838e3390d7e9ab1204cc3aa Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Wed, 10 Jan 2024 04:24:20 -0800 Subject: [PATCH 03/13] [RISCV] Set VP_MULH* to Expand on Zve64* and Optimize BuildVP*DIV * Set VP_MULHU/VP_MULHS with i64 vector input to Expand on Zve64* * Moved forward the IsOperationLegalOrCustom check in BuildSDIV/BuildUDIV --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 14 +-- .../CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll | 113 ++++++++++++++++++ 2 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 82a2500ff386d..e2b74408885c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6507,7 +6507,8 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, unsigned EltBits = VT.getScalarSizeInBits(); // Check to see if we can do this. - if (!isTypeLegal(VT)) + if (!isTypeLegal(VT) || + !isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization)) return SDValue(); SmallVector MagicFactors, Factors, Shifts, ShiftMasks; @@ -6577,9 +6578,7 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value. auto GetMULHS = [&](SDValue X, SDValue Y) { - if (isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization)) - return DAG.getNode(ISD::VP_MULHS, DL, VT, X, Y, Mask, VL); - return SDValue(); + return DAG.getNode(ISD::VP_MULHS, DL, VT, X, Y, Mask, VL); }; SDValue Q = GetMULHS(N0, MagicFactor); @@ -6822,7 +6821,8 @@ SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG, unsigned EltBits = VT.getScalarSizeInBits(); // Check to see if we can do this. - if (!isTypeLegal(VT)) + if (!isTypeLegal(VT) || + !isOperationLegalOrCustom(ISD::VP_MULHU, VT, IsAfterLegalization)) return SDValue(); bool UseNPQ = false, UsePreShift = false, UsePostShift = false; @@ -6908,9 +6908,7 @@ SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG, } auto GetMULHU = [&](SDValue X, SDValue Y) { - if (isOperationLegalOrCustom(ISD::VP_MULHU, VT, IsAfterLegalization)) - return DAG.getNode(ISD::VP_MULHU, DL, VT, X, Y, Mask, VL); - return SDValue(); + return DAG.getNode(ISD::VP_MULHU, DL, VT, X, Y, Mask, VL); }; // Multiply the numerator (operand 0) by the magic value. diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll new file mode 100644 index 0000000000000..2fa4abb642270 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs | FileCheck %s + +declare @llvm.vp.udiv.nxv1i64(, , , i32) +declare @llvm.vp.sdiv.nxv1i64(, , , i32) +declare @llvm.vp.urem.nxv1i64(, , , i32) +declare @llvm.vp.srem.nxv1i64(, , , i32) +declare @llvm.vp.shl.nxv1i64(, , , i32) + + +define @vpudiv_by_const_no_add_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_no_add_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpudiv_by_const_with_add_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpudiv_by_const_with_add_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 7 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 7, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_no_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_no_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 3 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_add_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_add_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 15 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 15, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsdiv_const_sub_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -3 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 -3, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpurem_by_const_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpurem_by_const_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} + +define @vpsrem_by_const_nxv1i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpsrem_by_const_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 5 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %vec = insertelement undef, i64 5, i32 0 + %splat = shufflevector %vec, poison, zeroinitializer + %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + ret %v +} From 81032b1a3891dc5c9612ad5abb7e1fc8bb1f6118 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 13 Feb 2023 08:39:03 -0800 Subject: [PATCH 04/13] [TargetLowering] Optimize 'factor' code in BuildVPSDIV. We can't constant fold VP_MUL yet or combine (VP_SUB 0, X) and VP_ADD. Add some flags to keep track of when we need to emit VP_MUL/VP_ADD/VP_SUB. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++- llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 142 +++++++----------- 2 files changed, 72 insertions(+), 95 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e2b74408885c0..b7846212a94ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6511,6 +6511,9 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, !isOperationLegalOrCustom(ISD::VP_MULHS, VT, IsAfterLegalization)) return SDValue(); + bool AnyFactorOne = false; + bool AnyFactorNegOne = false; + SmallVector MagicFactors, Factors, Shifts, ShiftMasks; auto BuildSDIVPattern = [&](ConstantSDNode *C) { @@ -6529,12 +6532,16 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, magics.Magic = 0; magics.ShiftAmount = 0; ShiftMask = 0; + AnyFactorOne |= Divisor.isOne(); + AnyFactorNegOne |= Divisor.isAllOnes(); } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) { // If d > 0 and m < 0, add the numerator. NumeratorFactor = 1; + AnyFactorOne = true; } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) { // If d < 0 and m > 0, subtract the numerator. NumeratorFactor = -1; + AnyFactorNegOne = true; } MagicFactors.push_back(DAG.getConstant(magics.Magic, DL, SVT)); @@ -6588,10 +6595,20 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, Created.push_back(Q.getNode()); // (Optionally) Add/subtract the numerator using Factor. - Factor = DAG.getNode(ISD::VP_MUL, DL, VT, N0, Factor, Mask, VL); - Created.push_back(Factor.getNode()); - Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, Factor, Mask, VL); - Created.push_back(Q.getNode()); + // FIXME: The AnyFactorOne/NegOne flags are a hack around lack of constant + // folding for VP_MUL/ADD. + if (AnyFactorOne && AnyFactorNegOne) { + Factor = DAG.getNode(ISD::VP_MUL, DL, VT, N0, Factor, Mask, VL); + Created.push_back(Factor.getNode()); + Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, Factor, Mask, VL); + Created.push_back(Q.getNode()); + } else if (AnyFactorOne) { + Q = DAG.getNode(ISD::VP_ADD, DL, VT, Q, N0, Mask, VL); + Created.push_back(Q.getNode()); + } else if (AnyFactorNegOne) { + Q = DAG.getNode(ISD::VP_SUB, DL, VT, Q, N0, Mask, VL); + Created.push_back(Q.getNode()); + } // Shift right algebraic by shift value. Q = DAG.getNode(ISD::VP_SRA, DL, VT, Q, Shift, Mask, VL); diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll index f78a0ec7f2378..6e417e4dd7995 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll @@ -707,11 +707,9 @@ define @vpsdiv_pow2_nxv1i64( %va, @vpsdiv_const_no_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_no_ashr_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 86 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: li a0, 86 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -726,12 +724,10 @@ define @vpsdiv_const_no_ashr_nxv8i8( %va, @vpsdiv_const_no_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_no_ashr_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 5 +; CHECK-NEXT: addiw a1, a1, 1366 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: lui a0, 5 -; CHECK-NEXT: addiw a0, a0, 1366 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -746,12 +742,10 @@ define @vpsdiv_const_no_ashr_nxv4i16( %va, define @vpsdiv_const_no_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_no_ashr_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 349525 +; CHECK-NEXT: addiw a1, a1, 1366 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: lui a0, 349525 -; CHECK-NEXT: addiw a0, a0, 1366 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -769,9 +763,7 @@ define @vpsdiv_const_no_ashr_nxv1i64( %va, ; CHECK-NEXT: lui a1, %hi(.LCPI39_0) ; CHECK-NEXT: ld a1, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t ; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t @@ -787,11 +779,9 @@ define @vpsdiv_const_no_ashr_nxv1i64( %va, define @vpsdiv_const_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_ashr_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 103 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: li a0, 103 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -806,12 +796,10 @@ define @vpsdiv_const_ashr_nxv8i8( %va, @vpsdiv_const_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_ashr_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 6 +; CHECK-NEXT: addiw a1, a1, 1639 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: lui a0, 6 -; CHECK-NEXT: addiw a0, a0, 1639 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -826,12 +814,10 @@ define @vpsdiv_const_ashr_nxv4i16( %va, @vpsdiv_const_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_ashr_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 419430 +; CHECK-NEXT: addiw a1, a1, 1639 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: lui a0, 419430 -; CHECK-NEXT: addiw a0, a0, 1639 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -849,9 +835,7 @@ define @vpsdiv_const_ashr_nxv1i64( %va, @vpsdiv_const_ashr_nxv1i64( %va, @vpsdiv_const_add_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_add_ashr_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: li a1, -109 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t -; CHECK-NEXT: li a0, -109 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -887,13 +869,11 @@ define @vpsdiv_const_add_ashr_nxv8i8( %va, @vpsdiv_const_add_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_add_ashr_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: lui a1, 1048569 +; CHECK-NEXT: addiw a1, a1, -1911 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t -; CHECK-NEXT: lui a0, 1048569 -; CHECK-NEXT: addiw a0, a0, -1911 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -908,13 +888,11 @@ define @vpsdiv_const_add_ashr_nxv4i16( %va, define @vpsdiv_const_add_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_add_ashr_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: lui a1, 599186 +; CHECK-NEXT: addiw a1, a1, 1171 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t -; CHECK-NEXT: lui a0, 599186 -; CHECK-NEXT: addiw a0, a0, 1171 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -932,10 +910,8 @@ define @vpsdiv_const_add_ashr_nxv1i64( %va, ; CHECK-NEXT: lui a1, %hi(.LCPI47_0) ; CHECK-NEXT: ld a1, %lo(.LCPI47_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t -; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t @@ -951,12 +927,10 @@ define @vpsdiv_const_add_ashr_nxv1i64( %va, define @vpsdiv_const_sub_ashr_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: li a1, 109 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t -; CHECK-NEXT: li a0, 109 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 7, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -971,13 +945,11 @@ define @vpsdiv_const_sub_ashr_nxv8i8( %va, @vpsdiv_const_sub_ashr_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: lui a1, 7 +; CHECK-NEXT: addiw a1, a1, 1911 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t -; CHECK-NEXT: lui a0, 7 -; CHECK-NEXT: addiw a0, a0, 1911 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 3, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -992,13 +964,11 @@ define @vpsdiv_const_sub_ashr_nxv4i16( %va, define @vpsdiv_const_sub_ashr_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: lui a1, 449390 +; CHECK-NEXT: addiw a1, a1, -1171 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, a1, v0.t -; CHECK-NEXT: lui a0, 449390 -; CHECK-NEXT: addiw a0, a0, -1171 -; CHECK-NEXT: vmulh.vx v8, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 2, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 31, v0.t ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t @@ -1016,10 +986,8 @@ define @vpsdiv_const_sub_ashr_nxv1i64( %va, ; CHECK-NEXT: lui a1, %hi(.LCPI51_0) ; CHECK-NEXT: ld a1, %lo(.LCPI51_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vmul.vx v9, v8, a0, v0.t -; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsrl.vx v9, v8, a0, v0.t @@ -1185,9 +1153,7 @@ define @vpsrem_by_const_nxv1i64( %va, @vpsrem_by_const_nxv1i64( %va, @vpsrem_by_const_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_const_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 6 +; CHECK-NEXT: addiw a1, a1, 1639 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: lui a0, 6 -; CHECK-NEXT: addiw a0, a0, 1639 -; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsrl.vi v10, v9, 15, v0.t ; CHECK-NEXT: vand.vi v10, v10, -1, v0.t @@ -1229,11 +1193,9 @@ define @vpsrem_by_const_nxv4i16( %va, @vpsrem_by_const_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_const_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 103 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: li a0, 103 -; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsrl.vi v10, v9, 7, v0.t ; CHECK-NEXT: vand.vi v10, v10, -1, v0.t @@ -1251,12 +1213,10 @@ define @vpsrem_by_const_nxv8i8( %va, @vpsrem_by_const_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_const_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 419430 +; CHECK-NEXT: addiw a1, a1, 1639 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmul.vx v9, v8, zero, v0.t -; CHECK-NEXT: lui a0, 419430 -; CHECK-NEXT: addiw a0, a0, 1639 -; CHECK-NEXT: vmulh.vx v10, v8, a0, v0.t -; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsrl.vi v10, v9, 31, v0.t ; CHECK-NEXT: vand.vi v10, v10, -1, v0.t From b833489b707156896d7fc28d97c2707aeba59230 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Thu, 6 Feb 2025 02:38:26 +0800 Subject: [PATCH 05/13] update test --- llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 613 +++++++++++++----- 1 file changed, 434 insertions(+), 179 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll index 6e417e4dd7995..b39fc392482cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll @@ -28,9 +28,7 @@ define @vpudiv_by_max_nxv8i8( %va, undef, i8 255, i32 0 @@ -44,9 +42,7 @@ define @vpudiv_by_max_nxv4i16( %va, undef, i16 65535, i32 0 @@ -60,9 +56,7 @@ define @vpudiv_by_max_nxv2i32( %va, undef, i32 4294967295, i32 0 @@ -76,9 +70,7 @@ define @vpudiv_by_max_nxv1i64( %va, undef, i64 18446744073709551615, i32 0 @@ -92,8 +84,7 @@ define @fold_vpudiv_vpurem_nxv8i8( %va, @fold_vpudiv_vpurem_nxv4i16( %va, @fold_vpudiv_vpurem_nxv2i32( %va, @fold_vpudiv_vpurem_nxv1i64( %va, @vpudiv_by_shl2_nxv4i16( %va, i16 %b define @vpudiv_by_shl2_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_shl2_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: slli a0, a0, 32 -; CHECK-NEXT: srli a0, a0, 32 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t @@ -227,9 +213,8 @@ define @vpudiv_by_shl2_nxv1i64( %va, i64 %b define @vpudiv_by_vpshl2_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_vpshl2_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -245,9 +230,8 @@ define @vpudiv_by_vpshl2_nxv8i8( %va, i8 %b, define @vpudiv_by_vpshl2_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_vpshl2_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -263,9 +247,8 @@ define @vpudiv_by_vpshl2_nxv4i16( %va, i16 define @vpudiv_by_vpshl2_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_vpshl2_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -281,9 +264,8 @@ define @vpudiv_by_vpshl2_nxv2i32( %va, i32 define @vpudiv_by_vpshl2_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_vpshl2_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -299,11 +281,10 @@ define @vpudiv_by_vpshl2_nxv1i64( %va, i64 define @vpudiv_by_const_no_add_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: li a1, -51 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: li a0, -51 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t ; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -317,12 +298,11 @@ define @vpudiv_by_const_no_add_nxv8i8( %va, < define @vpudiv_by_const_no_add_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: lui a1, 1048573 -; CHECK-NEXT: addiw a1, a1, -819 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: lui a0, 1048573 +; CHECK-NEXT: addi a0, a0, -819 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t ; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -336,12 +316,11 @@ define @vpudiv_by_const_no_add_nxv4i16( %va define @vpudiv_by_const_no_add_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: lui a1, 838861 -; CHECK-NEXT: addiw a1, a1, -819 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: lui a0, 838861 +; CHECK-NEXT: addi a0, a0, -819 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t ; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -355,14 +334,16 @@ define @vpudiv_by_const_no_add_nxv2i32( %va define @vpudiv_by_const_no_add_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: ld a1, %lo(.LCPI19_0)(a1) -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: lui a0, 838861 +; CHECK-NEXT: vmseq.vi v9, v9, 1, v0.t +; CHECK-NEXT: addiw a0, a0, -819 +; CHECK-NEXT: slli a1, a0, 32 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t +; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i64 5, i32 0 @@ -374,13 +355,12 @@ define @vpudiv_by_const_no_add_nxv1i64( %va define @vpudiv_by_const_with_add_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_with_add_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: li a1, 37 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: li a0, 37 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: li a0, -128 +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t ; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t ; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t @@ -396,14 +376,13 @@ define @vpudiv_by_const_with_add_nxv8i8( %va, define @vpudiv_by_const_with_add_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_with_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: lui a1, 2 -; CHECK-NEXT: addiw a1, a1, 1171 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: addi a0, a0, 1171 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t ; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t ; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t @@ -419,14 +398,13 @@ define @vpudiv_by_const_with_add_nxv4i16( % define @vpudiv_by_const_with_add_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_with_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: lui a1, 149797 -; CHECK-NEXT: addiw a1, a1, -1755 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: lui a0, 149797 +; CHECK-NEXT: addi a0, a0, -1755 +; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t ; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t ; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t ; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t @@ -444,13 +422,12 @@ define @vpudiv_by_const_with_add_nxv1i64( % ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: ld a1, %lo(.LCPI23_0)(a1) -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 ; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: slli a0, a0, 63 +; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t ; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t ; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t @@ -517,9 +494,7 @@ define @vpsdiv_by_min_nxv8i8( %va, undef, i8 -128, i32 0 @@ -535,9 +510,7 @@ define @vpsdiv_by_min_nxv1i64( %va, undef, i64 -9223372036854775808, i32 0 @@ -552,9 +525,7 @@ define @vpsdiv_by_min_nxv4i16( %va, undef, i16 -32768, i32 0 @@ -569,9 +540,7 @@ define @vpsdiv_by_min_nxv2i32( %va, undef, i32 -2147483648, i32 0 @@ -583,10 +552,9 @@ define @vpsdiv_by_min_nxv2i32( %va, @vpsdiv_pow2_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_pow2_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t ; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t ; CHECK-NEXT: vmor.mm v10, v10, v11 @@ -594,15 +562,12 @@ define @vpsdiv_pow2_nxv4i16( %va, undef, i16 4, i32 0 @@ -614,10 +579,9 @@ define @vpsdiv_pow2_nxv4i16( %va, @vpsdiv_pow2_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_pow2_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t ; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t ; CHECK-NEXT: vmor.mm v10, v10, v11 @@ -625,15 +589,12 @@ define @vpsdiv_pow2_nxv8i8( %va, undef, i8 4, i32 0 @@ -645,10 +606,9 @@ define @vpsdiv_pow2_nxv8i8( %va, @vpsdiv_pow2_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_pow2_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t ; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t ; CHECK-NEXT: vmor.mm v10, v10, v11 @@ -656,15 +616,12 @@ define @vpsdiv_pow2_nxv2i32( %va, undef, i32 4, i32 0 @@ -676,26 +633,24 @@ define @vpsdiv_pow2_nxv2i32( %va, @vpsdiv_pow2_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_pow2_nxv1i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vsra.vx v11, v8, a1, v0.t -; CHECK-NEXT: li a1, 62 -; CHECK-NEXT: vsrl.vx v11, v11, a1, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t +; CHECK-NEXT: li a0, 62 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t ; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t ; CHECK-NEXT: vsra.vi v11, v11, 2, v0.t -; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t -; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmsgt.vi v0, v11, 4, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 4, v0.t ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i64 4, i32 0 @@ -725,7 +680,7 @@ define @vpsdiv_const_no_ashr_nxv4i16( %va, ; CHECK-LABEL: vpsdiv_const_no_ashr_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 5 -; CHECK-NEXT: addiw a1, a1, 1366 +; CHECK-NEXT: addi a1, a1, 1366 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t @@ -743,7 +698,7 @@ define @vpsdiv_const_no_ashr_nxv2i32( %va, ; CHECK-LABEL: vpsdiv_const_no_ashr_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 349525 -; CHECK-NEXT: addiw a1, a1, 1366 +; CHECK-NEXT: addi a1, a1, 1366 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 0, v0.t @@ -797,7 +752,7 @@ define @vpsdiv_const_ashr_nxv4i16( %va, @vpsdiv_const_ashr_nxv2i32( %va, @vpsdiv_const_add_ashr_nxv4i16( %va, ; CHECK-LABEL: vpsdiv_const_add_ashr_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 1048569 -; CHECK-NEXT: addiw a1, a1, -1911 +; CHECK-NEXT: addi a1, a1, -1911 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t @@ -889,7 +844,7 @@ define @vpsdiv_const_add_ashr_nxv2i32( %va, ; CHECK-LABEL: vpsdiv_const_add_ashr_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 599186 -; CHECK-NEXT: addiw a1, a1, 1171 +; CHECK-NEXT: addi a1, a1, 1171 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t @@ -907,8 +862,10 @@ define @vpsdiv_const_add_ashr_nxv2i32( %va, define @vpsdiv_const_add_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_add_ashr_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI47_0) -; CHECK-NEXT: ld a1, %lo(.LCPI47_0)(a1) +; CHECK-NEXT: lui a1, 559241 +; CHECK-NEXT: addiw a1, a1, -1911 +; CHECK-NEXT: slli a2, a1, 32 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t @@ -946,7 +903,7 @@ define @vpsdiv_const_sub_ashr_nxv4i16( %va, ; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 7 -; CHECK-NEXT: addiw a1, a1, 1911 +; CHECK-NEXT: addi a1, a1, 1911 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t @@ -965,7 +922,7 @@ define @vpsdiv_const_sub_ashr_nxv2i32( %va, ; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 449390 -; CHECK-NEXT: addiw a1, a1, -1171 +; CHECK-NEXT: addi a1, a1, -1171 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t @@ -983,8 +940,10 @@ define @vpsdiv_const_sub_ashr_nxv2i32( %va, define @vpsdiv_const_sub_ashr_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_const_sub_ashr_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI51_0) -; CHECK-NEXT: ld a1, %lo(.LCPI51_0)(a1) +; CHECK-NEXT: lui a1, 349525 +; CHECK-NEXT: addiw a1, a1, 1365 +; CHECK-NEXT: slli a2, a1, 32 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a1, v0.t ; CHECK-NEXT: vsub.vv v8, v9, v8, v0.t @@ -1055,15 +1014,17 @@ define @vpurem_by_max_nxv2i32( %va, @vpurem_by_const_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_const_nxv1i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI56_0) -; CHECK-NEXT: ld a1, %lo(.LCPI56_0)(a1) -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 5 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: lui a0, 838861 +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: addiw a0, a0, -819 +; CHECK-NEXT: slli a1, a0, 32 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: li a0, 5 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -1079,17 +1040,16 @@ define @vpurem_by_const_nxv1i64( %va, @vpurem_by_const_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_const_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 5 -; CHECK-NEXT: lui a1, 1048573 -; CHECK-NEXT: addiw a1, a1, -819 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: lui a0, 1048573 +; CHECK-NEXT: addi a0, a0, -819 +; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t ; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t -; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t @@ -1103,16 +1063,15 @@ define @vpurem_by_const_nxv4i16( %va, @vpurem_by_const_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_const_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 5 -; CHECK-NEXT: li a1, -51 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: li a0, -51 +; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t ; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t -; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t @@ -1126,17 +1085,16 @@ define @vpurem_by_const_nxv8i8( %va, @vpurem_by_const_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_const_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 5 -; CHECK-NEXT: lui a1, 838861 -; CHECK-NEXT: addiw a1, a1, -819 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v11, v8, a1, v0.t +; CHECK-NEXT: lui a0, 838861 +; CHECK-NEXT: addi a0, a0, -819 +; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t ; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t -; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: li a0, 5 +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmul.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t @@ -1154,8 +1112,8 @@ define @vpsrem_by_const_nxv1i64( %va, @vpsrem_by_const_nxv4i16( %va, @vpsrem_by_const_nxv2i32( %va, @vpsrem_by_const_nxv2i32( %va, @vpudiv_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_1_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 7, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 8, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i8 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1244,6 +1219,23 @@ define @vpudiv_by_1_nxv8i8( %va, @vpudiv_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_1_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 16, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i16 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1254,6 +1246,24 @@ define @vpudiv_by_1_nxv4i16( %va, @vpudiv_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_1_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 31, v0.t +; CHECK-NEXT: vsrl.vx v11, v11, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i32 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1264,6 +1274,25 @@ define @vpudiv_by_1_nxv2i32( %va, @vpudiv_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_1_nxv1i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i64 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1274,6 +1303,23 @@ define @vpudiv_by_1_nxv1i64( %va, @vpsdiv_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_by_1_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 7, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 8, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i8 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1284,6 +1330,23 @@ define @vpsdiv_by_1_nxv8i8( %va, @vpsdiv_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_by_1_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 16, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i16 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1294,6 +1357,24 @@ define @vpsdiv_by_1_nxv4i16( %va, @vpsdiv_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_by_1_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsra.vi v11, v8, 31, v0.t +; CHECK-NEXT: vsrl.vx v11, v11, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i32 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1304,6 +1385,25 @@ define @vpsdiv_by_1_nxv2i32( %va, @vpsdiv_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsdiv_by_1_nxv1i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v12, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %vec = insertelement undef, i64 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1314,8 +1414,9 @@ define @vpsdiv_by_1_nxv1i64( %va, @vpurem_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_1_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i8 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1326,8 +1427,9 @@ define @vpurem_by_1_nxv8i8( %va, @vpurem_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_1_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i16 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1338,8 +1440,9 @@ define @vpurem_by_1_nxv4i16( %va, @vpurem_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_1_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i32 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1350,8 +1453,9 @@ define @vpurem_by_1_nxv2i32( %va, @vpurem_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpurem_by_1_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i64 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1362,8 +1466,25 @@ define @vpurem_by_1_nxv1i64( %va, @vpsrem_by_1_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_1_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v12 +; CHECK-NEXT: vsra.vi v12, v8, 7, v0.t +; CHECK-NEXT: vsrl.vi v12, v12, 8, v0.t +; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v11, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i8 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1374,8 +1495,25 @@ define @vpsrem_by_1_nxv8i8( %va, @vpsrem_by_1_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_1_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v12 +; CHECK-NEXT: vsra.vi v12, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v12, v12, 16, v0.t +; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v11, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i16 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1386,8 +1524,26 @@ define @vpsrem_by_1_nxv4i16( %va, @vpsrem_by_1_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_1_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v12 +; CHECK-NEXT: vsra.vi v12, v8, 31, v0.t +; CHECK-NEXT: vsrl.vx v12, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v11, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i32 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1398,8 +1554,27 @@ define @vpsrem_by_1_nxv2i32( %va, @vpsrem_by_1_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_1_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v11, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v13, 1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i64 1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1410,8 +1585,26 @@ define @vpsrem_by_1_nxv1i64( %va, @vpsrem_by_neg1_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_neg1_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v12 +; CHECK-NEXT: vsra.vi v12, v8, 7, v0.t +; CHECK-NEXT: vsrl.vi v12, v12, 8, v0.t +; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v11, -1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i8 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1422,8 +1615,26 @@ define @vpsrem_by_neg1_nxv8i8( %va, @vpsrem_by_neg1_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_neg1_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v12 +; CHECK-NEXT: vsra.vi v12, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v12, v12, 16, v0.t +; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v11, -1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i16 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1434,8 +1645,27 @@ define @vpsrem_by_neg1_nxv4i16( %va, @vpsrem_by_neg1_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_neg1_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmseq.vi v12, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vmor.mm v10, v10, v12 +; CHECK-NEXT: vsra.vi v12, v8, 31, v0.t +; CHECK-NEXT: vsrl.vx v12, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v12, v8, v12, v0.t +; CHECK-NEXT: vsra.vi v12, v12, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v12, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v12, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v11, -1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i32 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1446,8 +1676,28 @@ define @vpsrem_by_neg1_nxv2i32( %va, @vpsrem_by_neg1_nxv1i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpsrem_by_neg1_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmseq.vi v11, v10, -1, v0.t +; CHECK-NEXT: vmseq.vi v10, v10, 1, v0.t +; CHECK-NEXT: vsra.vx v12, v8, a0, v0.t +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vmor.mm v10, v10, v11 +; CHECK-NEXT: vsrl.vx v11, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v8, v11, v0.t +; CHECK-NEXT: vsra.vi v11, v11, 0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v11, v10, 0, v0.t +; CHECK-NEXT: vmsgt.vi v0, v13, -1, v0.t +; CHECK-NEXT: vmerge.vvm v10, v10, v11, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %vec = insertelement undef, i64 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer @@ -1480,18 +1730,23 @@ define @vpsdivrem_nxv8i8( %va, @vpudivrem_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudivrem_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 37 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: li a0, 37 +; CHECK-NEXT: vmulhu.vx v11, v8, a0, v0.t ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t -; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsub.vv v12, v8, v11, v0.t +; CHECK-NEXT: vmulhu.vx v12, v12, a0, v0.t +; CHECK-NEXT: vadd.vv v11, v12, v11, v0.t +; CHECK-NEXT: vsrl.vi v11, v11, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t ; CHECK-NEXT: li a0, 7 -; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t -; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t -; CHECK-NEXT: vmul.vx v10, v9, a0, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: vmerge.vvm v10, v11, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmul.vx v11, v10, a0, v0.t +; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsub.vv v8, v8, v11, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.urem.nxv8i8( %va, splat (i8 7), %m, i32 %evl) %w = call @llvm.vp.udiv.nxv8i8( %va, splat (i8 7), %m, i32 %evl) From 53b16ef51c0379812deeeead518ece3008143e29 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Wed, 19 Feb 2025 03:22:15 -0800 Subject: [PATCH 06/13] address comments --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 74ab35f8c5f05..5cd17a203dbf2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27375,7 +27375,7 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) { if (N1.getOpcode() == ISD::VP_SHL && N1->getOperand(2) == Mask && N1->getOperand(3) == VL) { SDValue N10 = N1.getOperand(0); - if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && + if (isConstantOrConstantVector(N10, /*NoOpaques=*/ true) && DAG.isKnownToBeAPowerOfTwo(N10)) { SDValue LogBase2 = BuildLogBase2(N10, DL); AddToWorklist(LogBase2.getNode()); @@ -27416,9 +27416,10 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) { // fold (udiv x, c) -> alternate AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isConstantOrConstantVector(N1) && - !TLI.isIntDivCheap(N->getValueType(0), Attr)) + !TLI.isIntDivCheap(N->getValueType(0), Attr)) { if (SDValue Op = BuildVPUDIV(N)) return Op; + } return SDValue(); } From 127432cb1683513fa8cb6495520ad10d233e7e72 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Wed, 19 Feb 2025 03:25:37 -0800 Subject: [PATCH 07/13] Merge VP_MULHU/HS expand and custom part --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 72b6ba0c2d8ce..dc8603a5376cf 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1283,8 +1283,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. - if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) + if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) { setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); + } else { + setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand); + } setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT, @@ -1305,11 +1308,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(IntegerVPOps, VT, Custom); - // Zve64* does not support VP_MULHU/S with nxvXi64. - if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) { - setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand); - } - if (Subtarget.hasStdExtZvkb()) setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); From 2fd98d76fe3aed548edcd4669641c4a3419594a1 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Wed, 19 Feb 2025 10:50:20 -0800 Subject: [PATCH 08/13] Use splat constant and chagne undef to poison --- .../CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll | 30 +- llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 432 ++++++------------ 2 files changed, 160 insertions(+), 302 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll index 2fa4abb642270..d9bb93248c44e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll @@ -15,9 +15,7 @@ define @vpudiv_by_const_no_add_nxv1i64( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -28,9 +26,7 @@ define @vpudiv_by_const_with_add_nxv1i64( % ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, splat (i64 7), %m, i32 %evl) ret %v } @@ -41,9 +37,7 @@ define @vpsdiv_const_no_ashr_nxv1i64( %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 3), %m, i32 %evl) ret %v } @@ -54,9 +48,7 @@ define @vpsdiv_const_ashr_nxv1i64( %va, undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -67,9 +59,7 @@ define @vpsdiv_const_add_ashr_nxv1i64( %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 15, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 15), %m, i32 %evl) ret %v } @@ -80,7 +70,7 @@ define @vpsdiv_const_sub_ashr_nxv1i64( %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 -3, i32 0 + %vec = insertelement poison, i64 -3, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) ret %v @@ -93,9 +83,7 @@ define @vpurem_by_const_nxv1i64( %va, undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -106,8 +94,6 @@ define @vpsrem_by_const_nxv1i64( %va, undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll index b39fc392482cf..5ef604132a64c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll @@ -31,9 +31,7 @@ define @vpudiv_by_max_nxv8i8( %va, undef, i8 255, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, splat (i8 255), %m, i32 %evl) ret %v } @@ -45,9 +43,7 @@ define @vpudiv_by_max_nxv4i16( %va, undef, i16 65535, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, splat (i16 65535), %m, i32 %evl) ret %v } @@ -59,9 +55,7 @@ define @vpudiv_by_max_nxv2i32( %va, undef, i32 4294967295, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, splat (i32 4294967295), %m, i32 %evl) ret %v } @@ -73,9 +67,7 @@ define @vpudiv_by_max_nxv1i64( %va, undef, i64 18446744073709551615, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, splat (i64 18446744073709551615), %m, i32 %evl) ret %v } @@ -89,10 +81,8 @@ define @fold_vpudiv_vpurem_nxv8i8( %va, undef, i8 128, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) - %u = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, splat (i8 128), %m, i32 %evl) + %u = call @llvm.vp.urem.nxv8i8( %va, splat (i8 128), %m, i32 %evl) %x = add %v, %u ret %x } @@ -107,10 +97,8 @@ define @fold_vpudiv_vpurem_nxv4i16( %va, undef, i16 16384, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) - %u = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, splat (i16 16384), %m, i32 %evl) + %u = call @llvm.vp.urem.nxv4i16( %va, splat (i16 16384), %m, i32 %evl) %x = add %v, %u ret %x } @@ -125,10 +113,8 @@ define @fold_vpudiv_vpurem_nxv2i32( %va, undef, i32 16384, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) - %u = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, splat (i32 16384), %m, i32 %evl) + %u = call @llvm.vp.urem.nxv2i32( %va, splat (i32 16384), %m, i32 %evl) %x = add %v, %u ret %x } @@ -143,10 +129,8 @@ define @fold_vpudiv_vpurem_nxv1i64( %va, undef, i64 16384, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) - %u = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, splat (i64 16384), %m, i32 %evl) + %u = call @llvm.vp.urem.nxv1i64( %va, splat (i64 16384), %m, i32 %evl) %x = add %v, %u ret %x } @@ -160,7 +144,7 @@ define @vpudiv_by_shl2_nxv8i8( %va, i8 %b, undef, i8 %sh, i32 0 + %vec = insertelement poison, i8 %sh, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) ret %v @@ -176,7 +160,7 @@ define @vpudiv_by_shl2_nxv4i16( %va, i16 %b ; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %sh = shl i16 2, %b - %vec = insertelement undef, i16 %sh, i32 0 + %vec = insertelement poison, i16 %sh, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) ret %v @@ -190,7 +174,7 @@ define @vpudiv_by_shl2_nxv2i32( %va, i32 %b ; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %sh = shl i32 2, %b - %vec = insertelement undef, i32 %sh, i32 0 + %vec = insertelement poison, i32 %sh, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) ret %v @@ -204,7 +188,7 @@ define @vpudiv_by_shl2_nxv1i64( %va, i64 %b ; CHECK-NEXT: vsrl.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %sh = shl i64 2, %b - %vec = insertelement undef, i64 %sh, i32 0 + %vec = insertelement poison, i64 %sh, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) ret %v @@ -218,11 +202,9 @@ define @vpudiv_by_vpshl2_nxv8i8( %va, i8 %b, ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec1 = insertelement undef, i8 4, i32 0 - %splat1 = shufflevector %vec1, poison, zeroinitializer - %vec2 = insertelement undef, i8 %b, i32 0 + %vec2 = insertelement poison, i8 %b, i32 0 %splat2 = shufflevector %vec2, poison, zeroinitializer - %sh = call @llvm.vp.shl.nxv8i8( %splat1, %splat2, %m, i32 %evl) + %sh = call @llvm.vp.shl.nxv8i8( splat (i8 4), %splat2, %m, i32 %evl) %v = call @llvm.vp.udiv.nxv8i8( %va, %sh, %m, i32 %evl) ret %v } @@ -235,11 +217,9 @@ define @vpudiv_by_vpshl2_nxv4i16( %va, i16 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec1 = insertelement undef, i16 4, i32 0 - %splat1 = shufflevector %vec1, poison, zeroinitializer - %vec2 = insertelement undef, i16 %b, i32 0 + %vec2 = insertelement poison, i16 %b, i32 0 %splat2 = shufflevector %vec2, poison, zeroinitializer - %sh = call @llvm.vp.shl.nxv4i16( %splat1, %splat2, %m, i32 %evl) + %sh = call @llvm.vp.shl.nxv4i16( splat (i16 4), %splat2, %m, i32 %evl) %v = call @llvm.vp.udiv.nxv4i16( %va, %sh, %m, i32 %evl) ret %v } @@ -252,11 +232,9 @@ define @vpudiv_by_vpshl2_nxv2i32( %va, i32 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec1 = insertelement undef, i32 4, i32 0 - %splat1 = shufflevector %vec1, poison, zeroinitializer - %vec2 = insertelement undef, i32 %b, i32 0 + %vec2 = insertelement poison, i32 %b, i32 0 %splat2 = shufflevector %vec2, poison, zeroinitializer - %sh = call @llvm.vp.shl.nxv2i32( %splat1, %splat2, %m, i32 %evl) + %sh = call @llvm.vp.shl.nxv2i32( splat (i32 4), %splat2, %m, i32 %evl) %v = call @llvm.vp.udiv.nxv2i32( %va, %sh, %m, i32 %evl) ret %v } @@ -269,11 +247,9 @@ define @vpudiv_by_vpshl2_nxv1i64( %va, i64 ; CHECK-NEXT: vadd.vi v9, v9, 2, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec1 = insertelement undef, i64 4, i32 0 - %splat1 = shufflevector %vec1, poison, zeroinitializer - %vec2 = insertelement undef, i64 %b, i32 0 + %vec2 = insertelement poison, i64 %b, i32 0 %splat2 = shufflevector %vec2, poison, zeroinitializer - %sh = call @llvm.vp.shl.nxv1i64( %splat1, %splat2, %m, i32 %evl) + %sh = call @llvm.vp.shl.nxv1i64( splat (i64 4), %splat2, %m, i32 %evl) %v = call @llvm.vp.udiv.nxv1i64( %va, %sh, %m, i32 %evl) ret %v } @@ -281,53 +257,47 @@ define @vpudiv_by_vpshl2_nxv1i64( %va, i64 define @vpudiv_by_const_no_add_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -51 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: li a0, -51 -; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i8 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, splat (i8 5), %m, i32 %evl) ret %v } define @vpudiv_by_const_no_add_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 1048573 +; CHECK-NEXT: addi a1, a1, -819 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: lui a0, 1048573 -; CHECK-NEXT: addi a0, a0, -819 -; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i16 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, splat (i16 5), %m, i32 %evl) ret %v } define @vpudiv_by_const_no_add_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_no_add_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 838861 +; CHECK-NEXT: addi a1, a1, -819 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: lui a0, 838861 -; CHECK-NEXT: addi a0, a0, -819 -; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmv.v.i v10, 5 +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i32 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -346,74 +316,66 @@ define @vpudiv_by_const_no_add_nxv1i64( %va ; CHECK-NEXT: vmv.v.v v0, v9 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } define @vpudiv_by_const_with_add_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_with_add_nxv8i8: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 37 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: li a0, 37 -; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t -; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t -; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i8 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, splat (i8 7), %m, i32 %evl) ret %v } define @vpudiv_by_const_with_add_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_with_add_nxv4i16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 2 +; CHECK-NEXT: addi a1, a1, 1171 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: lui a0, 2 -; CHECK-NEXT: addi a0, a0, 1171 -; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t -; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t -; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i16 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, splat (i16 7), %m, i32 %evl) ret %v } define @vpudiv_by_const_with_add_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpudiv_by_const_with_add_nxv2i32: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 149797 +; CHECK-NEXT: addi a1, a1, -1755 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: lui a0, 149797 -; CHECK-NEXT: addi a0, a0, -1755 -; CHECK-NEXT: vmulhu.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t -; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t -; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i32 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -423,20 +385,18 @@ define @vpudiv_by_const_with_add_nxv1i64( % ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: ld a1, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: vmulhu.vx v10, v8, a1, v0.t +; CHECK-NEXT: vmulhu.vx v9, v8, a1, v0.t ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vsub.vv v11, v8, v10, v0.t -; CHECK-NEXT: vmulhu.vx v11, v11, a0, v0.t -; CHECK-NEXT: vadd.vv v10, v11, v10, v0.t -; CHECK-NEXT: vsrl.vi v10, v10, 2, v0.t -; CHECK-NEXT: vmseq.vi v0, v9, 1, v0.t -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vsub.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmulhu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vadd.vv v9, v10, v9, v0.t +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: vsrl.vi v9, v9, 2, v0.t +; CHECK-NEXT: vmseq.vi v0, v10, 1, v0.t +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret - %vec = insertelement undef, i64 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, splat (i64 7), %m, i32 %evl) ret %v } @@ -446,7 +406,7 @@ define @vpsdiv_by_neg1_nxv8i8( %va, undef, i8 -1, i32 0 + %vec = insertelement poison, i8 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) ret %v @@ -458,7 +418,7 @@ define @vpsdiv_by_neg1_nxv1i64( %va, undef, i64 -1, i32 0 + %vec = insertelement poison, i64 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) ret %v @@ -470,7 +430,7 @@ define @vpsdiv_by_neg1_nxv4i16( %va, undef, i16 -1, i32 0 + %vec = insertelement poison, i16 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) ret %v @@ -482,7 +442,7 @@ define @vpsdiv_by_neg1_nxv2i32( %va, undef, i32 -1, i32 0 + %vec = insertelement poison, i32 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) ret %v @@ -497,7 +457,7 @@ define @vpsdiv_by_min_nxv8i8( %va, undef, i8 -128, i32 0 + %vec = insertelement poison, i8 -128, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) ret %v @@ -513,7 +473,7 @@ define @vpsdiv_by_min_nxv1i64( %va, undef, i64 -9223372036854775808, i32 0 + %vec = insertelement poison, i64 -9223372036854775808, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) ret %v @@ -528,7 +488,7 @@ define @vpsdiv_by_min_nxv4i16( %va, undef, i16 -32768, i32 0 + %vec = insertelement poison, i16 -32768, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) ret %v @@ -543,7 +503,7 @@ define @vpsdiv_by_min_nxv2i32( %va, undef, i32 -2147483648, i32 0 + %vec = insertelement poison, i32 -2147483648, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) ret %v @@ -570,9 +530,7 @@ define @vpsdiv_pow2_nxv4i16( %va, undef, i16 4, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -597,9 +555,7 @@ define @vpsdiv_pow2_nxv8i8( %va, undef, i8 4, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -624,9 +580,7 @@ define @vpsdiv_pow2_nxv2i32( %va, undef, i32 4, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -653,9 +607,7 @@ define @vpsdiv_pow2_nxv1i64( %va, undef, i64 4, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -670,9 +622,7 @@ define @vpsdiv_const_no_ashr_nxv8i8( %va, undef, i8 3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -688,9 +638,7 @@ define @vpsdiv_const_no_ashr_nxv4i16( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i16 3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -706,9 +654,7 @@ define @vpsdiv_const_no_ashr_nxv2i32( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i32 3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 3), %m, i32 %evl) ret %v } @@ -725,9 +671,7 @@ define @vpsdiv_const_no_ashr_nxv1i64( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 3), %m, i32 %evl) ret %v } @@ -742,9 +686,7 @@ define @vpsdiv_const_ashr_nxv8i8( %va, undef, i8 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -760,9 +702,7 @@ define @vpsdiv_const_ashr_nxv4i16( %va, undef, i16 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -778,9 +718,7 @@ define @vpsdiv_const_ashr_nxv2i32( %va, undef, i32 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -797,9 +735,7 @@ define @vpsdiv_const_ashr_nxv1i64( %va, undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -815,9 +751,7 @@ define @vpsdiv_const_add_ashr_nxv8i8( %va, undef, i8 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -834,9 +768,7 @@ define @vpsdiv_const_add_ashr_nxv4i16( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i16 15, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 15), %m, i32 %evl) ret %v } @@ -853,9 +785,7 @@ define @vpsdiv_const_add_ashr_nxv2i32( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i32 7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -875,9 +805,7 @@ define @vpsdiv_const_add_ashr_nxv1i64( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 15, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 15), %m, i32 %evl) ret %v } @@ -893,7 +821,7 @@ define @vpsdiv_const_sub_ashr_nxv8i8( %va, undef, i8 -7, i32 0 + %vec = insertelement poison, i8 -7, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) ret %v @@ -912,7 +840,7 @@ define @vpsdiv_const_sub_ashr_nxv4i16( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i16 -15, i32 0 + %vec = insertelement poison, i16 -15, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) ret %v @@ -931,7 +859,7 @@ define @vpsdiv_const_sub_ashr_nxv2i32( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i32 -7, i32 0 + %vec = insertelement poison, i32 -7, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) ret %v @@ -953,7 +881,7 @@ define @vpsdiv_const_sub_ashr_nxv1i64( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement undef, i64 -3, i32 0 + %vec = insertelement poison, i64 -3, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) ret %v @@ -966,9 +894,7 @@ define @vpurem_by_max_nxv1i64( %va, undef, i64 18446744073709551615, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i64( %va, splat (i64 18446744073709551615), %m, i32 %evl) ret %v } @@ -979,9 +905,7 @@ define @vpurem_by_max_nxv4i16( %va, undef, i16 65535, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i16( %va, splat (i16 65535), %m, i32 %evl) ret %v } @@ -992,9 +916,7 @@ define @vpurem_by_max_nxv8i8( %va, undef, i8 255, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i8( %va, splat (i8 255), %m, i32 %evl) ret %v } @@ -1005,9 +927,7 @@ define @vpurem_by_max_nxv2i32( %va, undef, i32 4294967295, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i32( %va, splat (i32 4294967295), %m, i32 %evl) ret %v } @@ -1031,9 +951,7 @@ define @vpurem_by_const_nxv1i64( %va, undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1042,8 +960,8 @@ define @vpurem_by_const_nxv4i16( %va, @vpurem_by_const_nxv4i16( %va, undef, i16 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -1065,8 +981,8 @@ define @vpurem_by_const_nxv8i8( %va, @vpurem_by_const_nxv8i8( %va, undef, i8 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -1087,8 +1001,8 @@ define @vpurem_by_const_nxv2i32( %va, @vpurem_by_const_nxv2i32( %va, undef, i32 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1121,9 +1033,7 @@ define @vpsrem_by_const_nxv1i64( %va, undef, i64 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1142,9 +1052,7 @@ define @vpsrem_by_const_nxv4i16( %va, undef, i16 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -1162,9 +1070,7 @@ define @vpsrem_by_const_nxv8i8( %va, undef, i8 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -1183,9 +1089,7 @@ define @vpsrem_by_const_nxv2i32( %va, undef, i32 5, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1210,9 +1114,7 @@ define @vpudiv_by_1_nxv8i8( %va, undef, i8 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 1), %m, i32 %evl) ret %v } @@ -1237,9 +1139,7 @@ define @vpudiv_by_1_nxv4i16( %va, undef, i16 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 1), %m, i32 %evl) ret %v } @@ -1265,9 +1165,7 @@ define @vpudiv_by_1_nxv2i32( %va, undef, i32 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 1), %m, i32 %evl) ret %v } @@ -1294,9 +1192,7 @@ define @vpudiv_by_1_nxv1i64( %va, undef, i64 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 1), %m, i32 %evl) ret %v } @@ -1321,9 +1217,7 @@ define @vpsdiv_by_1_nxv8i8( %va, undef, i8 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 1), %m, i32 %evl) ret %v } @@ -1348,9 +1242,7 @@ define @vpsdiv_by_1_nxv4i16( %va, undef, i16 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 1), %m, i32 %evl) ret %v } @@ -1376,9 +1268,7 @@ define @vpsdiv_by_1_nxv2i32( %va, undef, i32 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 1), %m, i32 %evl) ret %v } @@ -1405,9 +1295,7 @@ define @vpsdiv_by_1_nxv1i64( %va, undef, i64 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 1), %m, i32 %evl) ret %v } @@ -1418,9 +1306,7 @@ define @vpurem_by_1_nxv8i8( %va, undef, i8 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i8( %va, splat (i8 1), %m, i32 %evl) ret %v } @@ -1431,9 +1317,7 @@ define @vpurem_by_1_nxv4i16( %va, undef, i16 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i16( %va, splat (i16 1), %m, i32 %evl) ret %v } @@ -1444,9 +1328,7 @@ define @vpurem_by_1_nxv2i32( %va, undef, i32 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i32( %va, splat (i32 1), %m, i32 %evl) ret %v } @@ -1457,9 +1339,7 @@ define @vpurem_by_1_nxv1i64( %va, undef, i64 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i64( %va, splat (i64 1), %m, i32 %evl) ret %v } @@ -1486,9 +1366,7 @@ define @vpsrem_by_1_nxv8i8( %va, undef, i8 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i8( %va, splat (i8 1), %m, i32 %evl) ret %v } @@ -1515,9 +1393,7 @@ define @vpsrem_by_1_nxv4i16( %va, undef, i16 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i16( %va, splat (i16 1), %m, i32 %evl) ret %v } @@ -1545,9 +1421,7 @@ define @vpsrem_by_1_nxv2i32( %va, undef, i32 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i32( %va, splat (i32 1), %m, i32 %evl) ret %v } @@ -1576,9 +1450,7 @@ define @vpsrem_by_1_nxv1i64( %va, undef, i64 1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i64( %va, splat (i64 1), %m, i32 %evl) ret %v } @@ -1606,7 +1478,7 @@ define @vpsrem_by_neg1_nxv8i8( %va, undef, i8 -1, i32 0 + %vec = insertelement poison, i8 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) ret %v @@ -1636,7 +1508,7 @@ define @vpsrem_by_neg1_nxv4i16( %va, undef, i16 -1, i32 0 + %vec = insertelement poison, i16 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) ret %v @@ -1667,7 +1539,7 @@ define @vpsrem_by_neg1_nxv2i32( %va, undef, i32 -1, i32 0 + %vec = insertelement poison, i32 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) ret %v @@ -1699,7 +1571,7 @@ define @vpsrem_by_neg1_nxv1i64( %va, undef, i64 -1, i32 0 + %vec = insertelement poison, i64 -1, i32 0 %splat = shufflevector %vec, poison, zeroinitializer %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) ret %v From 12cc971715cbb3de9121a510356c57a8d06e8ec4 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Thu, 20 Feb 2025 05:34:01 -0800 Subject: [PATCH 09/13] !fixup didn't merge negative constants --- .../CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll | 64 +++++-------------- 2 files changed, 17 insertions(+), 51 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll index d9bb93248c44e..2972df3e1cf7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const-zve64.ll @@ -70,9 +70,7 @@ define @vpsdiv_const_sub_ashr_nxv1i64( %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %vec = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 -3), %m, i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll index 5ef604132a64c..c5159f7789d80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpdiv-by-const.ll @@ -406,9 +406,7 @@ define @vpsdiv_by_neg1_nxv8i8( %va, poison, i8 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -418,9 +416,7 @@ define @vpsdiv_by_neg1_nxv1i64( %va, poison, i64 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -430,9 +426,7 @@ define @vpsdiv_by_neg1_nxv4i16( %va, poison, i16 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -442,9 +436,7 @@ define @vpsdiv_by_neg1_nxv2i32( %va, poison, i32 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -457,9 +449,7 @@ define @vpsdiv_by_min_nxv8i8( %va, poison, i8 -128, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 -128), %m, i32 %evl) ret %v } @@ -473,9 +463,7 @@ define @vpsdiv_by_min_nxv1i64( %va, poison, i64 -9223372036854775808, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 -9223372036854775808), %m, i32 %evl) ret %v } @@ -488,9 +476,7 @@ define @vpsdiv_by_min_nxv4i16( %va, poison, i16 -32768, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 -32768), %m, i32 %evl) ret %v } @@ -503,9 +489,7 @@ define @vpsdiv_by_min_nxv2i32( %va, poison, i32 -2147483648, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 -2147483648), %m, i32 %evl) ret %v } @@ -821,9 +805,7 @@ define @vpsdiv_const_sub_ashr_nxv8i8( %va, poison, i8 -7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, splat (i8 -7), %m, i32 %evl) ret %v } @@ -840,9 +822,7 @@ define @vpsdiv_const_sub_ashr_nxv4i16( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, splat (i16 -15), %m, i32 %evl) ret %v } @@ -859,9 +839,7 @@ define @vpsdiv_const_sub_ashr_nxv2i32( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, splat (i32 -7), %m, i32 %evl) ret %v } @@ -881,9 +859,7 @@ define @vpsdiv_const_sub_ashr_nxv1i64( %va, ; CHECK-NEXT: vand.vi v9, v9, -1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %vec = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, splat (i64 -3), %m, i32 %evl) ret %v } @@ -1478,9 +1454,7 @@ define @vpsrem_by_neg1_nxv8i8( %va, poison, i8 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i8( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -1508,9 +1482,7 @@ define @vpsrem_by_neg1_nxv4i16( %va, poison, i16 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i16( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1539,9 +1511,7 @@ define @vpsrem_by_neg1_nxv2i32( %va, poison, i32 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i32( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1571,9 +1541,7 @@ define @vpsrem_by_neg1_nxv1i64( %va, poison, i64 -1, i32 0 - %splat = shufflevector %vec, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i64( %va, %splat, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } From 69c6cbce5a06a1d730e5b2afc9e1ed81abd61b42 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Tue, 25 Feb 2025 00:54:41 -0800 Subject: [PATCH 10/13] Merge BuildVPU(S)DIV into BuildU(S)DIV --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 +++++-------------- 1 file changed, 16 insertions(+), 42 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5cd17a203dbf2..bee77824c1d8b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -566,10 +566,8 @@ namespace { SDValue visitVPUDIV(SDNode *N); SDValue visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N); - SDValue BuildVPUDIV(SDNode *N); SDValue visitVPSDIV(SDNode *N); SDValue visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N); - SDValue BuildVPSDIV(SDNode *N); SDValue visitVPREM(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); @@ -27280,42 +27278,6 @@ SDValue DAGCombiner::visitVP_FSUB(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::BuildVPUDIV(SDNode *N) { - // when optimising for minimum size, we don't want to expand a div to a mul - // and a shift. - if (DAG.getMachineFunction().getFunction().hasMinSize()) - return SDValue(); - - SmallVector Built; - if (SDValue S = TLI.BuildVPUDIV(N, DAG, LegalOperations, Built)) { - for (SDNode *N : Built) - AddToWorklist(N); - return S; - } - - return SDValue(); -} - -/// Given an ISD::VP_SDIV node expressing a divide by constant, return -/// a DAG expression to select that will generate the same value by multiplying -/// by a magic number. -/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". -SDValue DAGCombiner::BuildVPSDIV(SDNode *N) { - // when optimising for minimum size, we don't want to expand a div to a mul - // and a shift. - if (DAG.getMachineFunction().getFunction().hasMinSize()) - return SDValue(); - - SmallVector Built; - if (SDValue S = TLI.BuildVPSDIV(N, DAG, LegalOperations, Built)) { - for (SDNode *N : Built) - AddToWorklist(N); - return S; - } - - return SDValue(); -} - SDValue DAGCombiner::visitVPUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -27417,7 +27379,7 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isConstantOrConstantVector(N1) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) { - if (SDValue Op = BuildVPUDIV(N)) + if (SDValue Op = BuildUDIV(N)) return Op; } return SDValue(); @@ -27537,7 +27499,7 @@ SDValue DAGCombiner::visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N) { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isConstantOrConstantVector(N1) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) - if (SDValue Op = BuildVPSDIV(N)) + if (SDValue Op = BuildSDIV(N)) return Op; return SDValue(); @@ -28640,7 +28602,13 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return SDValue(); SmallVector Built; - if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) { + SDValue S; + if (N->isVPOpcode()) + S = TLI.BuildVPSDIV(N, DAG, LegalOperations, Built); + else + S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built); + + if (S) { for (SDNode *N : Built) AddToWorklist(N); return S; @@ -28681,7 +28649,13 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return SDValue(); SmallVector Built; - if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) { + SDValue S; + if (N->isVPOpcode()) + S = TLI.BuildVPUDIV(N, DAG, LegalOperations, Built); + else + S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built); + + if (S) { for (SDNode *N : Built) AddToWorklist(N); return S; From e2c0516f145de6bace66874eeb2c81a22faa33ad Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Tue, 25 Feb 2025 01:05:12 -0800 Subject: [PATCH 11/13] !fixup NoOpaques --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bee77824c1d8b..d185115d07261 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27321,7 +27321,7 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) { EVT VT = N->getValueType(0); // fold (vp.udiv x, (1 << c)) -> vp.lshr(x, c) - if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + if (isConstantOrConstantVector(N1, /*NoOpaques=*/true) && DAG.isKnownToBeAPowerOfTwo(N1)) { SDValue LogBase2 = BuildLogBase2(N1, DL); AddToWorklist(LogBase2.getNode()); @@ -27337,7 +27337,7 @@ SDValue DAGCombiner::visitVPUDIVLike(SDValue N0, SDValue N1, SDNode *N) { if (N1.getOpcode() == ISD::VP_SHL && N1->getOperand(2) == Mask && N1->getOperand(3) == VL) { SDValue N10 = N1.getOperand(0); - if (isConstantOrConstantVector(N10, /*NoOpaques=*/ true) && + if (isConstantOrConstantVector(N10, /*NoOpaques=*/true) && DAG.isKnownToBeAPowerOfTwo(N10)) { SDValue LogBase2 = BuildLogBase2(N10, DL); AddToWorklist(LogBase2.getNode()); From f2df824aa523b750c204d45292a1e8d818b361b3 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Wed, 26 Feb 2025 00:34:34 -0800 Subject: [PATCH 12/13] Remove improper fixme and unreachable cases --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 - llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d185115d07261..ecb428bff27e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -27487,7 +27487,6 @@ SDValue DAGCombiner::visitVPSDIVLike(SDValue N0, SDValue N1, SDNode *N) { SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Sub = DAG.getNode(ISD::VP_SUB, DL, VT, Zero, Sra, Mask, VL); - // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. SDValue IsNeg = DAG.getSetCCVP(DL, CCVT, N1, Zero, ISD::SETLT, Mask, VL); SDValue Res = DAG.getNode(ISD::VP_SELECT, DL, VT, IsNeg, Sub, Sra, VL); return Res; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b7846212a94ab..a558cf0dbe8ae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6575,12 +6575,6 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, Factor = DAG.getSplatVector(VT, DL, Factors[0]); Shift = DAG.getSplatVector(ShVT, DL, Shifts[0]); ShiftMask = DAG.getSplatVector(VT, DL, ShiftMasks[0]); - } else { - assert(isa(N1) && "Expected a constant"); - MagicFactor = MagicFactors[0]; - Factor = Factors[0]; - Shift = Shifts[0]; - ShiftMask = ShiftMasks[0]; } // Multiply the numerator (operand 0) by the magic value. @@ -6911,11 +6905,6 @@ SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG, MagicFactor = DAG.getSplatVector(VT, DL, MagicFactors[0]); NPQFactor = DAG.getSplatVector(VT, DL, NPQFactors[0]); PostShift = DAG.getSplatVector(ShVT, DL, PostShifts[0]); - } else { - assert(isa(N1) && "Expected a constant"); - PreShift = PreShifts[0]; - MagicFactor = MagicFactors[0]; - PostShift = PostShifts[0]; } SDValue Q = N0; From f8c91e958b9252fa8f9542ec2364476e20a45f34 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Fri, 28 Mar 2025 00:10:21 -0700 Subject: [PATCH 13/13] !fixup comments --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 ++++-- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a558cf0dbe8ae..df6c2abef04f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6566,7 +6566,8 @@ SDValue TargetLowering::BuildVPSDIV(SDNode *N, SelectionDAG &DAG, Factor = DAG.getBuildVector(VT, DL, Factors); Shift = DAG.getBuildVector(ShVT, DL, Shifts); ShiftMask = DAG.getBuildVector(VT, DL, ShiftMasks); - } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { + } else { + assert(N1.getOpcode() == ISD::SPLAT_VECTOR && "Expected a splat_vector"); assert(MagicFactors.size() == 1 && Factors.size() == 1 && Shifts.size() == 1 && ShiftMasks.size() == 1 && "Expected matchUnaryPredicate to return one element for scalable " @@ -6897,7 +6898,8 @@ SDValue TargetLowering::BuildVPUDIV(SDNode *N, SelectionDAG &DAG, MagicFactor = DAG.getBuildVector(VT, DL, MagicFactors); NPQFactor = DAG.getBuildVector(VT, DL, NPQFactors); PostShift = DAG.getBuildVector(ShVT, DL, PostShifts); - } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) { + } else { + assert(N1.getOpcode() == ISD::SPLAT_VECTOR && "Expected a splat_vector"); assert(PreShifts.size() == 1 && MagicFactors.size() == 1 && NPQFactors.size() == 1 && PostShifts.size() == 1 && "Expected matchUnaryPredicate to return one for scalable vectors"); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index dc8603a5376cf..ba4aaf36a0650 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1286,7 +1286,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) { setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); } else { - setOperationAction({ISD::VP_MULHU, ISD::VP_MULHS}, VT, Expand); + setOperationAction({ISD::VP_MULHS, ISD::VP_MULHU}, VT, Expand); } setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,