From e46afef25dd6f34c4d5b7f27885f3b69175e5091 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 30 Jan 2025 12:06:45 +0800 Subject: [PATCH 1/4] [RISCV] Select mask operands as virtual registers and eliminate vmv0 This is another attempt at #88496 to keep mask operands in SSA after instruction selection. Previously we selected the mask operands into vmv0, a singleton register class with exactly one register, V0. But the register allocator doesn't really support singleton register classes and we ran into errors like "ran out of registers during register allocation in function". This avoids this by introducing a pass just before register allocation that converts any use of vmv0 to a copy to $v0, i.e. what isel currently does today. That way the register allocator doesn't need to deal with the singleton register class, but get the benefits of having the mask registers in SSA throughout the backend: - This allows RISCVVLOptimizer to reduce the VLs of instructions that define mask registers - It enables CSE and code sinking in more places - It removes the need to peek through mask copies in RISCVISelDAGToDAG and keep track of V0 defs in RISCVVectorPeephole As a follow up, we can move the elimination pass to after phi elimination and outside of SSA, which would unblock the pre-RA scheduler around masked pseudos. This might also help the issue that RISCVVectorMaskDAGMutation tries to solve. --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCV.h | 3 + llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 107 +- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 92 +- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 36 +- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 408 ++++---- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 70 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 + .../lib/Target/RISCV/RISCVVMV0Elimination.cpp | 154 +++ llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 65 +- .../instruction-select/rvv/select.mir | 110 +- llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/commutable.ll | 69 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 32 +- .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll | 76 +- .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 953 ++++++++++++------ .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 34 +- .../RISCV/rvv/fixed-vectors-vselect.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 6 +- .../CodeGen/RISCV/rvv/fnearbyint-sdnode.ll | 70 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 46 +- .../CodeGen/RISCV/rvv/implicit-def-copy.ll | 4 +- .../test/CodeGen/RISCV/rvv/mask-reg-alloc.mir | 6 +- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 58 +- .../RISCV/rvv/pass-fast-math-flags-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 20 +- .../RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir | 80 +- .../RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 152 +-- .../rvv/strided-vpload-vpstore-output.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 60 +- .../RISCV/rvv/vector-extract-last-active.ll | 96 +- .../RISCV/rvv/vector-reassociations.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 178 ++-- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 109 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 16 +- .../RISCV/rvv/vleff-vlseg2ff-output.ll | 8 +- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 18 +- .../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 68 +- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 83 +- 52 files changed, 1887 insertions(+), 1616 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 98d3615ebab58..9b23a5ab521c8 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -63,6 +63,7 @@ add_llvm_target(RISCVCodeGen RISCVVectorMaskDAGMutation.cpp RISCVVectorPeephole.cpp RISCVVLOptimizer.cpp + RISCVVMV0Elimination.cpp RISCVZacasABIFix.cpp GISel/RISCVCallLowering.cpp GISel/RISCVInstructionSelector.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index b1aee98739e85..851eea1352852 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -107,6 +107,9 @@ void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVVLOptimizerPass(); void initializeRISCVVLOptimizerPass(PassRegistry &); + +FunctionPass *createRISCVVMV0EliminationPass(); +void initializeRISCVVMV0EliminationPass(PassRegistry &); } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ec2e8f1d50264..fb2c5c62ef871 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -254,7 +254,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl &Operands, bool IsLoad, MVT *IndexVT) { SDValue Chain = Node->getOperand(0); - SDValue Glue; Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. @@ -265,11 +264,8 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( } if (IsMasked) { - // Mask needs to be copied to V0. SDValue Mask = Node->getOperand(CurOp++); - Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); - Glue = Chain.getValue(1); - Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + Operands.push_back(Mask); } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); @@ -291,8 +287,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( } Operands.push_back(Chain); // Chain. - if (Glue) - Operands.push_back(Glue); } void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, @@ -1844,19 +1838,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } - // Mask needs to be copied to V0. - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, - RISCV::V0, Mask, SDValue()); - SDValue Glue = Chain.getValue(1); - SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); - if (IsCmpConstant) { SDValue Imm = selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget); ReplaceNode(Node, CurDAG->getMachineNode( VMSGTMaskOpcode, DL, VT, - {MaskedOff, Src1, Imm, V0, VL, SEW, Glue})); + {MaskedOff, Src1, Imm, Mask, VL, SEW})); return; } @@ -1867,7 +1855,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // the agnostic result can be either undisturbed or all 1. SDValue Cmp = SDValue( CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, - {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), + {MaskedOff, Src1, Src2, Mask, VL, SEW}), 0); // vmxor.mm vd, vd, v0 is used to update active value. ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, @@ -3287,12 +3275,10 @@ static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, return false; assert(RISCVII::hasVLOp(TSFlags)); - bool HasGlueOp = User->getGluedNode() != nullptr; - unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; + unsigned ChainOpIdx = User->getNumOperands() - 1; bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); - unsigned VLIdx = - User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; + unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2; const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); if (UserOpNo == VLIdx) @@ -3759,43 +3745,7 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { return false; } -// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg -// that's glued to the pseudo. This tries to look up the value that was copied -// to V0. -static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) { - // Check that we're using V0 as a mask register. - if (!isa(MaskOp) || - cast(MaskOp)->getReg() != RISCV::V0) - return SDValue(); - - // The glued user defines V0. - const auto *Glued = GlueOp.getNode(); - - if (!Glued || Glued->getOpcode() != ISD::CopyToReg) - return SDValue(); - - // Check that we're defining V0 as a mask register. - if (!isa(Glued->getOperand(1)) || - cast(Glued->getOperand(1))->getReg() != RISCV::V0) - return SDValue(); - - SDValue MaskSetter = Glued->getOperand(2); - - // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came - // from an extract_subvector or insert_subvector. - if (MaskSetter->isMachineOpcode() && - MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) - MaskSetter = MaskSetter->getOperand(0); - - return MaskSetter; -} - -static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { - // Check the instruction defining V0; it needs to be a VMSET pseudo. - SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp); - if (!MaskSetter) - return false; - +static bool usesAllOnesMask(SDValue MaskOp) { const auto IsVMSet = [](unsigned Opc) { return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || @@ -3806,14 +3756,7 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { // TODO: Check that the VMSET is the expected bitwidth? The pseudo has // undefined behaviour if it's the wrong bitwidth, so we could choose to // assume that it's all-ones? Same applies to its VL. - return MaskSetter->isMachineOpcode() && - IsVMSet(MaskSetter.getMachineOpcode()); -} - -// Return true if we can make sure mask of N is all-ones mask. -static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { - return usesAllOnesMask(N->getOperand(MaskOpIdx), - N->getOperand(N->getNumOperands() - 1)); + return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode()); } static bool isImplicitDef(SDValue V) { @@ -3829,9 +3772,7 @@ static bool isImplicitDef(SDValue V) { } // Optimize masked RVV pseudo instructions with a known all-ones mask to their -// corresponding "unmasked" pseudo versions. The mask we're interested in will -// take the form of a V0 physical register operand, with a glued -// register-setting instruction. +// corresponding "unmasked" pseudo versions. bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { const RISCV::RISCVMaskedPseudoInfo *I = RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); @@ -3839,7 +3780,7 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { return false; unsigned MaskOpIdx = I->MaskOpIdx; - if (!usesAllOnesMask(N, MaskOpIdx)) + if (!usesAllOnesMask(N->getOperand(MaskOpIdx))) return false; // There are two classes of pseudos in the table - compares and @@ -3863,18 +3804,13 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { // Skip the passthru operand at index 0 if the unmasked don't have one. bool ShouldSkip = !HasPassthru && MaskedHasPassthru; for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) { - // Skip the mask, and the Glue. + // Skip the mask SDValue Op = N->getOperand(I); - if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) + if (I == MaskOpIdx) continue; Ops.push_back(Op); } - // Transitively apply any node glued to our new node. - const auto *Glued = N->getGluedNode(); - if (auto *TGlued = Glued->getGluedNode()) - Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); - MachineSDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); @@ -3910,17 +3846,13 @@ static bool IsVMerge(SDNode *N) { // The resulting policy is the effective policy the vmerge would have had, // i.e. whether or not it's passthru operand was implicit-def. bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { - SDValue Passthru, False, True, VL, Mask, Glue; + SDValue Passthru, False, True, VL, Mask; assert(IsVMerge(N)); Passthru = N->getOperand(0); False = N->getOperand(1); True = N->getOperand(2); Mask = N->getOperand(3); VL = N->getOperand(4); - // We always have a glue node for the mask at v0. - Glue = N->getOperand(N->getNumOperands() - 1); - assert(cast(Mask)->getReg() == RISCV::V0); - assert(Glue.getValueType() == MVT::Glue); // If the EEW of True is different from vmerge's SEW, then we can't fold. if (True.getSimpleValueType() != N->getSimpleValueType(0)) @@ -3963,12 +3895,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (TII->get(TrueOpc).hasUnmodeledSideEffects()) return false; - // The last operand of a masked instruction may be glued. - bool HasGlueOp = True->getGluedNode() != nullptr; - - // The chain operand may exist either before the glued operands or in the last - // position. - unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; + unsigned TrueChainOpIdx = True.getNumOperands() - 1; bool HasChainOp = True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; @@ -3980,7 +3907,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { LoopWorklist.push_back(False.getNode()); LoopWorklist.push_back(Mask.getNode()); LoopWorklist.push_back(VL.getNode()); - LoopWorklist.push_back(Glue.getNode()); if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) return false; } @@ -3988,7 +3914,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // The vector policy operand may be present for masked intrinsics bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); unsigned TrueVLIndex = - True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; + True.getNumOperands() - HasVecPolicyOp - HasChainOp - 2; SDValue TrueVL = True.getOperand(TrueVLIndex); SDValue SEW = True.getOperand(TrueVLIndex + 1); @@ -4020,7 +3946,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL)) return false; if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) && - (Mask && !usesAllOnesMask(Mask, Glue))) + (Mask && !usesAllOnesMask(Mask))) return false; // Make sure it doesn't raise any observable fp exceptions, since changing the @@ -4077,9 +4003,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (HasChainOp) Ops.push_back(True.getOperand(TrueChainOpIdx)); - // Add the glue for the CopyToReg of mask->v0. - Ops.push_back(Glue); - MachineSDNode *Result = CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); Result->setFlags(True->getFlags()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 268bfe70673a2..46cf27838d1ce 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3945,7 +3945,7 @@ class VPatUnaryMask(intrinsic_name#"_mask") (result_type result_reg_class:$passthru), (op2_type op2_reg_class:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast( !if(isSEWAware, @@ -3953,7 +3953,7 @@ class VPatUnaryMask; + (mask_type VMV0:$vm), GPR:$vl, log2sew, (XLenVT timm:$policy))>; class VPatUnaryMaskRoundingMode(intrinsic_name#"_mask") (result_type result_reg_class:$passthru), (op2_type op2_reg_class:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), VLOpFrag, (XLenVT timm:$policy))), (!cast( @@ -3978,7 +3978,7 @@ class VPatUnaryMaskRoundingMode; @@ -3996,7 +3996,7 @@ class VPatUnaryMaskRTZ(intrinsic_name#"_mask") (result_type result_reg_class:$passthru), (op2_type op2_reg_class:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT 0b001), VLOpFrag, (XLenVT timm:$policy))), (!cast( @@ -4005,7 +4005,7 @@ class VPatUnaryMaskRTZ; class VPatMaskUnaryNoMask(intrinsic_name#"_mask") (mti.Mask VR:$passthru), (mti.Mask VR:$rs2), - (mti.Mask V0), + (mti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_M_"#mti.BX#"_MASK") (mti.Mask VR:$passthru), (mti.Mask VR:$rs2), - (mti.Mask V0), GPR:$vl, mti.Log2SEW, TU_MU)>; + (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW, TU_MU)>; class VPatUnaryAnyMask(inst#"_MASK") (result_type result_reg_class:$passthru), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; class VPatBinaryMaskPolicy(inst#"_MASK") (result_type result_reg_class:$passthru), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>; + (mask_type VMV0:$vm), GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatBinaryMaskPolicyRoundingMode(inst#"_MASK") (result_type result_reg_class:$passthru), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), GPR:$vl, sew, (XLenVT timm:$policy))>; @@ -4214,13 +4214,13 @@ class VPatBinaryMaskSwapped(inst#"_MASK") (result_type result_reg_class:$passthru), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; class VPatTiedBinaryNoMask(inst#"_MASK_TIED") (result_type result_reg_class:$passthru), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>; + (mask_type VMV0:$vm), GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatTiedBinaryMaskRoundingMode(inst#"_MASK_TIED") (result_type result_reg_class:$passthru), (op2_type op2_kind:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), GPR:$vl, sew, (XLenVT timm:$policy))>; @@ -4447,13 +4447,13 @@ class VPatTernaryMaskPolicy(inst#"_"#kind#"_"#vlmul.MX # "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatTernaryMaskPolicyRoundingMode(!if(isSEWAware, @@ -4482,7 +4482,7 @@ class VPatTernaryMaskPolicyRoundingMode; @@ -4502,13 +4502,13 @@ class VPatTernaryMaskTU(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)# "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), GPR:$vl, log2sew, TU_MU)>; class VPatTernaryMaskTURoundingMode(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)# "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), GPR:$vl, log2sew, TU_MU)>; @@ -4546,9 +4546,9 @@ multiclass VPatUnaryS_M(inst#"_M_"#mti.BX) $rs1, GPR:$vl, mti.Log2SEW)>; def : Pat<(XLenVT (!cast(intrinsic_name # "_mask") - (mti.Mask VR:$rs1), (mti.Mask V0), VLOpFrag)), + (mti.Mask VR:$rs1), (mti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_M_"#mti.BX#"_MASK") $rs1, - (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; + (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW)>; } } @@ -4636,9 +4636,9 @@ multiclass VPatNullaryV { vti.RegClass:$passthru, GPR:$vl, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (!cast(intrinsic # "_mask") (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction#"_V_" # vti.LMul.MX # "_MASK") - vti.RegClass:$passthru, (vti.Mask V0), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -4736,13 +4736,13 @@ multiclass VPatBinaryCarryInTAIL(inst#"_"#kind#"_"#vlmul.MX) (result_type result_reg_class:$passthru), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; } multiclass VPatBinaryCarryIn(intrinsic) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), VLOpFrag)), (!cast(inst#"_"#kind#"_"#vlmul.MX) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; } multiclass VPatBinaryMaskOut; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } } @@ -6174,14 +6174,14 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$passthru), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; @@ -6200,14 +6200,14 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$passthru), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast("PseudoVADD_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, (NegImm simm5_plus1:$rs2), - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; @@ -6844,14 +6844,14 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$passthru), (vti.Vector vti.RegClass:$rs1), (XLenVT 1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast("PseudoVADD_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; @@ -7194,9 +7194,9 @@ foreach fvti = AllFloatVectors in { def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru), (fvti.Vector fvti.RegClass:$rs2), (fvti.Scalar (fpimm0)), - (fvti.Mask V0), VLOpFrag)), + (fvti.Mask VMV0:$vm), VLOpFrag)), (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 8f77b2ce34d1f..629e5ccf2f27d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -928,16 +928,16 @@ foreach vtiToWti = AllWidenableIntVectors in { (!cast("PseudoVWADDU_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(shl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag)), + def : Pat<(shl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADD_VV_"#vti.LMul.MX#"_MASK") (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(shl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(shl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADDU_VV_"#vti.LMul.MX#"_MASK") (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1081,24 +1081,24 @@ defm : VPatWidenMulAddSDNode_VX; // 11.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1, + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1, vti.RegClass:$rs2)), (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), + vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask VMV0:$vm), vti.AVL, vti.Log2SEW)>; - def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1), + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1), vti.RegClass:$rs2)), (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; + vti.RegClass:$rs2, GPR:$rs1, (vti.Mask VMV0:$vm), vti.AVL, vti.Log2SEW)>; - def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1), + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; + vti.RegClass:$rs2, simm5:$rs1, (vti.Mask VMV0:$vm), vti.AVL, vti.Log2SEW)>; } } @@ -1348,39 +1348,39 @@ defm : VPatFPSetCCSDNode_VV_VF_FV; foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1, + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1, fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; } } foreach fvti = AllFloatVectors in { let Predicates = GetVTypePredicates.Predicates in - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), - (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; } // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 333ae52534681..9ba0745ce5745 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -628,7 +628,7 @@ class VPatBinaryVL_V( !if(isSEWAware, @@ -637,7 +637,7 @@ class VPatBinaryVL_V; + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; class VPatBinaryVL_V_RM( !if(isSEWAware, @@ -665,7 +665,7 @@ class VPatBinaryVL_V_RM(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK_TIED") result_reg_class:$rs1, op2_reg_class:$rs2, - (mask_type V0), GPR:$vl, sew, TU_MU)>; + (mask_type VMV0:$vm), GPR:$vl, sew, TU_MU)>; multiclass VPatTiedBinaryNoMaskVL_V_RM( !if(isSEWAware, @@ -797,7 +797,7 @@ class VPatBinaryVL_XI; + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; multiclass VPatBinaryVL_VV_VX vtilist = AllIntegerVectors, @@ -889,7 +889,7 @@ class VPatBinaryVL_VF( !if(isSEWAware, @@ -898,7 +898,7 @@ class VPatBinaryVL_VF; + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; class VPatBinaryVL_VF_RM( !if(isSEWAware, @@ -924,7 +924,7 @@ class VPatBinaryVL_VF_RM( !if(isSEWAware, @@ -977,7 +977,7 @@ multiclass VPatBinaryFPVL_R_VF; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -988,7 +988,7 @@ multiclass VPatBinaryFPVL_R_VF_RM( !if(isSEWAware, @@ -996,7 +996,7 @@ multiclass VPatBinaryFPVL_R_VF_RM(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") VR:$passthru, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } // Inherits from VPatIntegerSetCCVL_VV and adds a pattern with operands swapped. @@ -1025,11 +1025,11 @@ multiclass VPatIntegerSetCCVL_VV_Swappable(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") VR:$passthru, vti.RegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } multiclass VPatIntegerSetCCVL_VX_Swappable; + GPR:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)), (vti.Vector vti.RegClass:$rs1), invcc, VR:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (instruction_masked VR:$passthru, vti.RegClass:$rs1, - GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + GPR:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } multiclass VPatIntegerSetCCVL_VI_Swappable; // FIXME: Can do some canonicalization to remove these patterns. def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, VR:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (instruction_masked VR:$passthru, vti.RegClass:$rs1, - simm5:$rs2, (vti.Mask V0), GPR:$vl, + simm5:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } @@ -1084,31 +1084,31 @@ multiclass VPatFPSetCCVL_VV_VF_FV(inst_name#"_VV_"#fvti.LMul.MX#"_MASK") VR:$passthru, fvti.RegClass:$rs1, - fvti.RegClass:$rs2, (fvti.Mask V0), + fvti.RegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1), (SplatFPOp fvti.ScalarRegClass:$rs2), cc, VR:$passthru, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") VR:$passthru, fvti.RegClass:$rs1, - fvti.ScalarRegClass:$rs2, (fvti.Mask V0), + fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (vop (SplatFPOp fvti.ScalarRegClass:$rs2), (fvti.Vector fvti.RegClass:$rs1), cc, VR:$passthru, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") VR:$passthru, fvti.RegClass:$rs1, - fvti.ScalarRegClass:$rs2, (fvti.Mask V0), + fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } } @@ -1122,11 +1122,11 @@ multiclass VPatExtendVL_V.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fti.Vector fti.RegClass:$rs2), - (fti.Mask V0), VLOpFrag)), + (fti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst_name#"_"#suffix#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fti.RegClass:$rs2, - (fti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (fti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1138,11 +1138,11 @@ multiclass VPatConvertFP2IVL_V { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), GPR:$vl, ivti.Log2SEW, TA_MA)>; + (fvti.Mask VMV0:$vm), GPR:$vl, ivti.Log2SEW, TA_MA)>; } } @@ -1153,11 +1153,11 @@ multiclass VPatConvertFP2I_RM_VL_V.Predicates, GetVTypePredicates.Predicates) in def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), (XLenVT timm:$frm), + (fvti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW, + (fvti.Mask VMV0:$vm), timm:$frm, GPR:$vl, ivti.Log2SEW, TA_MA)>; } } @@ -1168,11 +1168,11 @@ multiclass VPatConvertI2FPVL_V_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1186,11 +1186,11 @@ multiclass VPatConvertI2FP_RM_VL_V { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask V0), (XLenVT timm:$frm), + (ivti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - (ivti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + (ivti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1203,11 +1203,11 @@ multiclass VPatWConvertFP2IVL_V let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1219,11 +1219,11 @@ multiclass VPatWConvertFP2I_RM_VL_V { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), (XLenVT timm:$frm), + (fvti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + (fvti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1235,11 +1235,11 @@ multiclass VPatWConvertI2FPVL_V.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), GPR:$vl, ivti.Log2SEW, TA_MA)>; } } @@ -1256,11 +1256,11 @@ multiclass VPatNConvertFP2IVL_W.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (fwti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1271,11 +1271,11 @@ multiclass VPatNConvertFP2I_RM_VL_W { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), (XLenVT timm:$frm), + (fwti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), timm:$frm, GPR:$vl, vti.Log2SEW, TA_MA)>; + (fwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1287,11 +1287,11 @@ multiclass VPatNConvertI2FPVL_W_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), - (iwti.Mask V0), + (iwti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, - (iwti.Mask V0), + (iwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1306,11 +1306,11 @@ multiclass VPatNConvertI2FP_RM_VL_W { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), - (iwti.Mask V0), (XLenVT timm:$frm), + (iwti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, - (iwti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + (iwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1320,13 +1320,13 @@ multiclass VPatReductionVL { let Predicates = GetVTypePredicates.Predicates in { def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), VR:$rs2, - (vti.Mask V0), VLOpFrag, + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (vti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), (vti_m1.Vector VR:$rs2), - (vti.Mask V0), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } } @@ -1337,13 +1337,13 @@ multiclass VPatReductionVL_RM let Predicates = GetVTypePredicates.Predicates in { def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), VR:$rs2, - (vti.Mask V0), VLOpFrag, + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (vti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), (vti_m1.Vector VR:$rs2), - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1402,11 +1402,11 @@ multiclass VPatWidenReductionVL.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -1421,11 +1421,11 @@ multiclass VPatWidenReductionVL_Ext_VL.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -1440,11 +1440,11 @@ multiclass VPatWidenReductionVL_Ext_VL_RM.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1570,10 +1570,10 @@ multiclass VPatNarrowShiftExtVL_WV(instruction_name#"_WV_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1616,7 +1616,7 @@ multiclass VPatMultiplyAccVL_VV_VX { foreach vti = AllIntegerVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1624,8 +1624,8 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1633,8 +1633,8 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1642,8 +1642,8 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1651,7 +1651,7 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } } @@ -1665,17 +1665,17 @@ multiclass VPatWidenMultiplyAddVL_VV_VX { def : Pat<(vwmacc_op (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), (wti.Vector wti.RegClass:$rd), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instr_name#"_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vwmacc_op (SplatPat XLenVT:$rs1), (vti.Vector vti.RegClass:$rs2), (wti.Vector wti.RegClass:$rd), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instr_name#"_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1708,19 +1708,19 @@ multiclass VPatFPMulAddVL_VV_VF defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask V0), + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } } @@ -1730,11 +1730,11 @@ multiclass VPatFPMulAddVL_VV_VF_RM.Predicates in { def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask V0), + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1742,11 +1742,11 @@ multiclass VPatFPMulAddVL_VV_VF_RM(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1759,34 +1759,34 @@ multiclass VPatFPMulAccVL_VV_VF { foreach vti = AllFloatVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } } @@ -1795,46 +1795,46 @@ multiclass VPatFPMulAccVL_VV_VF_RM { foreach vti = AllFloatVectors in { defvar suffix = vti.LMul.MX # "_E" # vti.SEW; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1851,18 +1851,18 @@ multiclass VPatWidenFPMulAccVL_VV_VF { GetVTypePredicates.Predicates) in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_VV_"#vti.LMul.MX #"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } } @@ -1881,22 +1881,22 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM(instruction_name#"_VV_"#suffix#"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1911,20 +1911,20 @@ multiclass VPatSlideVL_VX_VI { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), - uimm5:$rs2, (vti.Mask V0), + uimm5:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, uimm5:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask V0), + GPR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -1935,10 +1935,10 @@ multiclass VPatSlide1VL_VX { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask V0), VLOpFrag)), + GPR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; } } } @@ -1948,10 +1948,10 @@ multiclass VPatSlide1VL_VF { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), - vti.Scalar:$rs2, (vti.Mask V0), VLOpFrag)), + vti.Scalar:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_MASK") vti.RegClass:$rs3, vti.RegClass:$rs1, vti.Scalar:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; } } } @@ -1961,16 +1961,16 @@ multiclass VPatAVGADDVL_VV_VX_RM { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), - vti.RegClass:$passthru, (vti.Mask V0), VLOpFrag), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVAADD"#suffix#"_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatPat (XLenVT GPR:$rs2))), - vti.RegClass:$passthru, (vti.Mask V0), VLOpFrag), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVAADD"#suffix#"_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } } @@ -1990,16 +1990,16 @@ foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), (vti.Vector vti.RegClass:$rs1), - vti.RegClass:$passthru, (vti.Mask V0), VLOpFrag), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1), - vti.RegClass:$passthru, (vti.Mask V0), VLOpFrag), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, simm5:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2017,22 +2017,22 @@ foreach vtiToWti = AllWidenableIntVectors in { GetVTypePredicates.Predicates) in { def : Pat<(riscv_shl_vl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, VLOpFrag)), - wti.RegClass:$passthru, (vti.Mask V0), VLOpFrag), + wti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWADD_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, VLOpFrag)), - wti.RegClass:$passthru, (vti.Mask V0), VLOpFrag), + wti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWADDU_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2091,11 +2091,11 @@ foreach vtiTowti = AllWidenableIntVectors in { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } // 11.8. Vector Integer Comparison Instructions @@ -2182,41 +2182,41 @@ foreach vtiTowti = AllWidenableIntVectors in { def : Pat<(riscv_vwmaccsu_vl (vti.Vector vti.RegClass:$rs1), (SplatPat XLenVT:$rs2), (wti.Vector wti.RegClass:$rd), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWMACCUS_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // 11.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask VMV0:$vm), vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$passthru, VLOpFrag)), (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) vti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, vti.RegClass:$passthru, VLOpFrag)), (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) vti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2, vti.RegClass:$passthru, VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) vti.RegClass:$passthru, vti.RegClass:$rs2, simm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } } @@ -2266,17 +2266,17 @@ foreach vtiTowti = AllWidenableIntVectors in { GetVTypePredicates.Predicates) in { // Rounding mode here is arbitrary since we aren't shifting out any bits. def : Pat<(vti.Vector (riscv_trunc_vector_vl_ssat (wti.Vector wti.RegClass:$rs1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVNCLIP_WI_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, - (vti.Mask V0), /*RNU*/0, GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), /*RNU*/0, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (riscv_trunc_vector_vl_usat (wti.Vector wti.RegClass:$rs1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, - (vti.Mask V0), /*RNU*/0, GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), /*RNU*/0, GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -2344,39 +2344,39 @@ defm : VPatFPSetCCVL_VV_VF_FV.Predicates in { // 13.8. Vector Floating-Point Square-Root Instruction - def : Pat<(any_riscv_fsqrt_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask V0), + def : Pat<(any_riscv_fsqrt_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSQRT_V_"# vti.LMul.MX # "_E" # vti.SEW # "_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TA_MA)>; // 13.12. Vector Floating-Point Sign-Injection Instructions - def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0), + def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, - vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; // Handle fneg with VFSGNJN using the same input for both operands. - def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0), + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, - vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), @@ -2393,26 +2393,26 @@ foreach vti = AllFloatVectors in { def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (SplatFPOp vti.ScalarRegClass:$rs2), vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, - vti.ScalarRegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; // Rounding without exception to implement nearbyint. def : Pat<(any_riscv_vfround_noexcept_vl (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; // 14.14. Vector Floating-Point Classify Instruction def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFCLASS_V_"# vti.LMul.MX #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -2422,39 +2422,39 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { // 13.15. Vector Floating-Point Merge Instruction defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.RegClass:$passthru, VLOpFrag)), (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) - fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))), fvti.RegClass:$rs2, fvti.RegClass:$passthru, VLOpFrag)), (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) - fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), + fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2, fvti.RegClass:$passthru, VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) - fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } } foreach fvti = AllFloatVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, fvti.RegClass:$passthru, @@ -2462,7 +2462,7 @@ foreach fvti = AllFloatVectors in { (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) fvti.RegClass:$passthru, fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } } @@ -2524,11 +2524,11 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates)) in def : Pat<(fwti.Vector (any_riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } @@ -2538,11 +2538,11 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { let Predicates = [HasVInstructionsBF16Minimal] in def : Pat<(fwti.Vector (any_riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } @@ -2568,10 +2568,10 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates)) in { def : Pat<(fvti.Vector (any_riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), VLOpFrag)), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -2581,10 +2581,10 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), VLOpFrag)), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>; + (fwti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -2594,10 +2594,10 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { let Predicates = [HasVInstructionsBF16Minimal] in def : Pat<(fvti.Vector (any_riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), VLOpFrag)), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -2700,20 +2700,20 @@ foreach mti = AllMasks in { VLOpFrag)), (!cast("PseudoVCPOP_M_" # mti.BX) VR:$rs2, GPR:$vl, mti.Log2SEW)>; - def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0), + def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVCPOP_M_" # mti.BX # "_MASK") - VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; + VR:$rs2, (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW)>; // 15.3 vfirst find-first-set mask bit def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask true_mask), VLOpFrag)), (!cast("PseudoVFIRST_M_" # mti.BX) VR:$rs2, GPR:$vl, mti.Log2SEW)>; - def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask V0), + def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFIRST_M_" # mti.BX # "_MASK") - VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; + VR:$rs2, (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW)>; } } @@ -2736,26 +2736,26 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, vti.RegClass:$rs1, vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_E"# vti.SEW#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, uimm5:$imm, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // emul = lmul * 16 / sew @@ -2771,11 +2771,11 @@ foreach vti = AllIntegerVectors in { (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, (ivti.Vector ivti.RegClass:$rs1), vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, ivti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2806,27 +2806,27 @@ foreach vti = !listconcat(AllFloatVectors, AllBFloatVectors) in { (riscv_vrgather_vv_vl vti.RegClass:$rs2, (ivti.Vector vti.RegClass:$rs1), vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_E"# vti.SEW#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, uimm5:$imm, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } defvar vlmul = vti.LMul; @@ -2842,11 +2842,11 @@ foreach vti = !listconcat(AllFloatVectors, AllBFloatVectors) in { (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, (ivti.Vector ivti.RegClass:$rs1), vti.RegClass:$passthru, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, ivti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2883,10 +2883,10 @@ def riscv_fslide1down_vl : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, [ foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVID_V_"#vti.LMul.MX#"_MASK") - (vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (vti.Vector (IMPLICIT_DEF)), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 430d75e5cec5b..470555769d493 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -716,12 +716,12 @@ multiclass VPatUnaryVL_V.Predicates) in { def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_V_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -736,17 +736,17 @@ foreach vti = AllIntegerVectors in { (vti.Vector vti.RegClass:$rs1), (riscv_splat_vector -1), (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVANDN_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -755,13 +755,13 @@ foreach vti = AllIntegerVectors in { (not vti.ScalarRegClass:$rs1)), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVANDN_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -769,13 +769,13 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (riscv_and_vl (riscv_splat_vector invLogicImm:$rs1), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVANDN_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, invLogicImm:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -797,12 +797,12 @@ foreach vti = AllIntegerVectors in { def : Pat<(riscv_rotl_vl vti.RegClass:$rs2, (vti.Vector (SplatPat_uimm6 uimm6:$rs1)), (vti.Vector vti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVROR_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs2, (!cast("InvRot" # vti.SEW # "Imm") uimm6:$rs1), - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } defm : VPatBinaryVL_VV_VX_VI; @@ -817,90 +817,90 @@ foreach vtiToWti = AllWidenableIntVectors in { (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_ext_vl_oneuse (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, uimm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, uimm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$passthru), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") wti.RegClass:$passthru, vti.RegClass:$rs2, uimm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1046,12 +1046,12 @@ multiclass VPatBinaryV_VI_VROL; } } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 4a69bdeb76161..b2b77584c414d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -137,6 +137,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVExpandPseudoPass(*PR); initializeRISCVVectorPeepholePass(*PR); initializeRISCVVLOptimizerPass(*PR); + initializeRISCVVMV0EliminationPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVInsertReadWriteCSRPass(*PR); initializeRISCVInsertWriteVXRMPass(*PR); @@ -612,6 +613,8 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner) addPass(&MachinePipelinerID); + + addPass(createRISCVVMV0EliminationPass()); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp new file mode 100644 index 0000000000000..24a75c1a1592f --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp @@ -0,0 +1,154 @@ +//===- RISCVVMV0Elimination.cpp - VMV0 Elimination -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// Mask operands in vector pseudos have to be in v0. We select them as a virtual +// register in the singleton vmv0 register class instead of copying them to $v0 +// straight away, to make optimizing masks easier. +// +// However the register allocator struggles with singleton register classes and +// will run into errors like "ran out of registers during register allocation in +// function" +// +// This pass runs just before register allocation and replaces any uses* of vmv0 +// with copies to $v0. +// +// %x:vrnov0 = PseudoVADD_VV_M1_MASK %0:vrnov0, %1:vr, %2:vr, %3:vmv0, ... +// -> +// $v0 = COPY %3:vr +// %x:vrnov0 = PseudoVADD_VV_M1_MASK %0:vrnov0, %1:vr, %2:vr, $0, ... +// +// * The only uses of vmv0 left behind are when used for inline asm with the vm +// constraint. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#ifndef NDEBUG +#include "llvm/ADT/PostOrderIterator.h" +#endif +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-vmv0-elimination" + +namespace { + +class RISCVVMV0Elimination : public MachineFunctionPass { +public: + static char ID; + RISCVVMV0Elimination() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + // TODO: We could move this closer to regalloc, out of SSA, which would + // allow scheduling past mask operands. We would need to preserve live + // intervals. + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + +} // namespace + +char RISCVVMV0Elimination::ID = 0; + +INITIALIZE_PASS(RISCVVMV0Elimination, DEBUG_TYPE, "RISC-V VMV0 Elimination", + false, false) + +FunctionPass *llvm::createRISCVVMV0EliminationPass() { + return new RISCVVMV0Elimination(); +} + +bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // Skip if the vector extension is not enabled. + const RISCVSubtarget *ST = &MF.getSubtarget(); + if (!ST->hasVInstructions()) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const TargetInstrInfo *TII = ST->getInstrInfo(); + + auto IsVMV0 = [](const MCOperandInfo &MCOI) { + return MCOI.RegClass == RISCV::VMV0RegClassID; + }; + +#ifndef NDEBUG + // Assert that we won't clobber any existing reads of V0 where we need to + // insert copies. + ReversePostOrderTraversal RPOT(&*MF.begin()); + SmallPtrSet V0ClobberedOnEntry; + for (MachineBasicBlock *MBB : RPOT) { + bool V0Clobbered = V0ClobberedOnEntry.contains(MBB); + for (MachineInstr &MI : *MBB) { + assert(!(MI.readsRegister(RISCV::V0, TRI) && V0Clobbered)); + if (MI.modifiesRegister(RISCV::V0, TRI)) + V0Clobbered = false; + + if (any_of(MI.getDesc().operands(), IsVMV0)) + V0Clobbered = true; + } + + if (V0Clobbered) + for (MachineBasicBlock *Succ : MBB->successors()) + V0ClobberedOnEntry.insert(Succ); + } +#endif + + bool MadeChange = false; + + // For any instruction with a vmv0 operand, replace it with a copy to v0. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // An instruction should only have one or zero vmv0 operands. + assert(count_if(MI.getDesc().operands(), IsVMV0) < 2); + + for (auto [OpNo, MCOI] : enumerate(MI.getDesc().operands())) { + if (IsVMV0(MCOI)) { + MachineOperand &MO = MI.getOperand(OpNo); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0) + .addReg(MO.getReg()); + MO.setReg(RISCV::V0); + MadeChange = true; + break; + } + } + } + } + + // Now that any constraints requiring vmv0 are gone, eliminate any uses of + // vmv0 by recomputing the reg class. + // The only remaining uses should be around inline asm. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.uses()) { + if (MO.isReg() && MO.getReg().isVirtual() && + MRI.getRegClass(MO.getReg()) == &RISCV::VMV0RegClass) { + MRI.recomputeRegClass(MO.getReg()); + assert(MRI.getRegClass(MO.getReg()) != &RISCV::VMV0RegClass || + MI.isInlineAsm() || + MRI.getVRegDef(MO.getReg())->isInlineAsm()); + MadeChange = true; + } + } + } + } + + return MadeChange; +} diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index bb2d1717c3b1e..a4e7219c39f37 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -73,9 +73,7 @@ class RISCVVectorPeephole : public MachineFunctionPass { bool isAllOnesMask(const MachineInstr *MaskDef) const; std::optional getConstant(const MachineOperand &VL) const; bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const; - - /// Maps uses of V0 to the corresponding def of V0. - DenseMap V0Defs; + bool isKnownSameDefs(const MachineOperand &A, const MachineOperand &B) const; }; } // namespace @@ -268,14 +266,8 @@ bool RISCVVectorPeephole::convertToVLMAX(MachineInstr &MI) const { } bool RISCVVectorPeephole::isAllOnesMask(const MachineInstr *MaskDef) const { - assert(MaskDef && MaskDef->isCopy() && - MaskDef->getOperand(0).getReg() == RISCV::V0); - Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); - if (!SrcReg.isVirtual()) - return false; - MaskDef = MRI->getVRegDef(SrcReg); - if (!MaskDef) - return false; + while (MaskDef->isCopy() && MaskDef->getOperand(1).getReg().isVirtual()) + MaskDef = MRI->getVRegDef(MaskDef->getOperand(1).getReg()); // TODO: Check that the VMSET is the expected bitwidth? The pseudo has // undefined behaviour if it's the wrong bitwidth, so we could choose to @@ -372,8 +364,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI); if (!NewOpc) return false; - assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); - if (!isAllOnesMask(V0Defs.lookup(&MI))) + if (!isAllOnesMask(MRI->getVRegDef(MI.getOperand(4).getReg()))) return false; MI.setDesc(TII->get(NewOpc)); @@ -390,6 +381,15 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { return true; } +bool RISCVVectorPeephole::isKnownSameDefs(const MachineOperand &A, + const MachineOperand &B) const { + if (A.getReg().isPhysical() || B.getReg().isPhysical()) + return false; + + return TRI->lookThruCopyLike(A.getReg(), MRI) == + TRI->lookThruCopyLike(B.getReg(), MRI); +} + /// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the /// same mask, and the masked pseudo's passthru is the same as the false /// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V. @@ -404,14 +404,18 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { if (!NewOpc) return false; MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg()); - if (!True || True->getParent() != MI.getParent() || - !RISCV::getMaskedPseudoInfo(True->getOpcode()) || !hasSameEEW(MI, *True)) + + if (!True || True->getParent() != MI.getParent()) return false; - const MachineInstr *TrueV0Def = V0Defs.lookup(True); - const MachineInstr *MIV0Def = V0Defs.lookup(&MI); - assert(TrueV0Def && TrueV0Def->isCopy() && MIV0Def && MIV0Def->isCopy()); - if (TrueV0Def->getOperand(1).getReg() != MIV0Def->getOperand(1).getReg()) + auto *TrueMaskedInfo = RISCV::getMaskedPseudoInfo(True->getOpcode()); + if (!TrueMaskedInfo || !hasSameEEW(MI, *True)) + return false; + + const MachineOperand &TrueMask = + True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs()); + const MachineOperand &MIMask = MI.getOperand(4); + if (!isKnownSameDefs(TrueMask, MIMask)) return false; // True's passthru needs to be equivalent to False @@ -450,7 +454,8 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const { if (!I) return false; - if (!isAllOnesMask(V0Defs.lookup(&MI))) + if (!isAllOnesMask(MRI->getVRegDef( + MI.getOperand(I->MaskOpIdx + MI.getNumExplicitDefs()).getReg()))) return false; // There are two classes of pseudos in the table - compares and @@ -575,7 +580,6 @@ bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) { MRI->replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(2).getReg()); MI.eraseFromParent(); - V0Defs.erase(&MI); return true; } @@ -645,7 +649,6 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { MRI->replaceRegWith(MI.getOperand(0).getReg(), Src->getOperand(0).getReg()); MI.eraseFromParent(); - V0Defs.erase(&MI); return true; } @@ -665,24 +668,6 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // Masked pseudos coming out of isel will have their mask operand in the form: - // - // $v0:vr = COPY %mask:vr - // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr - // - // Because $v0 isn't in SSA, keep track of its definition at each use so we - // can check mask operands. - for (const MachineBasicBlock &MBB : MF) { - const MachineInstr *CurrentV0Def = nullptr; - for (const MachineInstr &MI : MBB) { - if (MI.readsRegister(RISCV::V0, TRI)) - V0Defs[&MI] = CurrentV0Def; - - if (MI.definesRegister(RISCV::V0, TRI)) - CurrentV0Def = &MI; - } - } - for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : make_early_inc_range(MBB)) { Changed |= convertToVLMAX(MI); diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir index 42bf321228705..f8061462c6220 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir @@ -10,20 +10,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv1i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv1i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -41,20 +39,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv4i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv4i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -72,20 +68,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv16i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8m4 ; ; RV64I-LABEL: name: select_nxv16i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8m4 %0:vrb() = G_IMPLICIT_DEF @@ -103,20 +97,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv64i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv64i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -134,20 +126,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv2i16 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv2i16 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -165,20 +155,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv8i16 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8m4 ; ; RV64I-LABEL: name: select_nxv8i16 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8m4 %0:vrb() = G_IMPLICIT_DEF @@ -196,20 +184,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv32i16 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv32i16 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -227,20 +213,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv2i32 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8m2 ; ; RV64I-LABEL: name: select_nxv2i32 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8m2 %0:vrb() = G_IMPLICIT_DEF @@ -258,20 +242,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv8i32 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8m8 ; ; RV64I-LABEL: name: select_nxv8i32 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8m8 %0:vrb() = G_IMPLICIT_DEF @@ -289,20 +271,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv1i64 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8m2 ; ; RV64I-LABEL: name: select_nxv1i64 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8m2 %0:vrb() = G_IMPLICIT_DEF @@ -320,20 +300,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv4i64 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8m8 ; ; RV64I-LABEL: name: select_nxv4i64 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8m8 %0:vrb() = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index f60def9d546f8..f93cb65897210 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -43,6 +43,7 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 668c734612447..a2b5e9c86a107 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -127,6 +127,7 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index 1b9c78a20ec3b..039266b169ab2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1515,40 +1515,36 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll index e26c467f025bd..5f35626120178 100644 --- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll +++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll @@ -26,10 +26,9 @@ define @commutable_vadd_vv_masked( %0, @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -59,10 +58,9 @@ define @commutable_vand_vv_masked( %0, @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -92,10 +90,9 @@ define @commutable_vor_vv_masked( %0, @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -125,10 +122,9 @@ define @commutable_vxor_vv_masked( %0, @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -158,10 +154,9 @@ define @commutable_vmseq_vv_masked( %0, @llvm.riscv.vmseq.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmseq.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -191,10 +186,9 @@ define @commutable_vmsne_vv_masked( %0, @llvm.riscv.vmsne.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmsne.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -224,10 +218,9 @@ define @commutable_vmin_vv_masked( %0, @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -257,10 +250,9 @@ define @commutable_vminu_vv_masked( %0, @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -290,10 +282,9 @@ define @commutable_vmax_vv_masked( %0, @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -323,10 +314,9 @@ define @commutable_vmaxu_vv_masked( %0, @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -356,10 +346,9 @@ define @commutable_vmul_vv_masked( %0, @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -389,10 +378,9 @@ define @commutable_vmulh_vv_masked( %0, @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -422,10 +410,9 @@ define @commutable_vmulhu_vv_masked( %0, @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -456,9 +443,8 @@ define @commutable_vwadd_vv_masked( %0, @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -489,9 +475,8 @@ define @commutable_vwaddu_vv_masked( %0, @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -522,9 +507,8 @@ define @commutable_vwmul_vv_masked( %0, @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -555,9 +539,8 @@ define @commutable_vwmulu_vv_masked( %0, @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -637,10 +620,9 @@ define @commutable_vadc_vv( %0, @llvm.riscv.vadc.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2) @@ -671,10 +653,9 @@ define @commutable_vsadd_vv_masked( %0, @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -704,10 +685,9 @@ define @commutable_vsaddu_vv_masked( %0, @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -739,10 +719,9 @@ define @commutable_vaadd_vv_masked( %0, @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -774,10 +753,9 @@ define @commutable_vaaddu_vv_masked( %0, @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -809,10 +787,9 @@ define @commutable_vsmul_vv_masked( %0, @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 585a331e55094..d33049bac14db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -3906,10 +3906,9 @@ define void @trunc_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -3971,10 +3970,8 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI172_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -3990,10 +3987,9 @@ define void @trunc_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -4086,11 +4082,10 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4157,11 +4152,9 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4178,11 +4171,10 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4280,11 +4272,10 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4351,11 +4342,9 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4372,11 +4361,10 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4474,11 +4462,10 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4545,11 +4532,9 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4566,11 +4551,10 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index 80a9143d1ad8b..fe65da0d330f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -569,59 +569,69 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: lui a2, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: lui a1, %hi(.LCPI26_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t ; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t ; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index a91dee1cb245f..037ed257f4a89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i7> @llvm.vp.trunc.v2i7.v2i16(<2 x i16>, <2 x i1>, i32) @@ -222,316 +222,645 @@ define <2 x i32> @vtrunc_v2i32_v2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32) define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v128i32_v128i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 72 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vi v6, v0, 8 -; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: addi a3, a1, 640 -; CHECK-NEXT: addi a4, a7, -64 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v6, 4 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: sltu a3, a7, a4 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a4, a3, a4 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v7, 4 -; CHECK-NEXT: bltu a3, a2, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v26, 2 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 6 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a5, .LBB16_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a6, 64 -; CHECK-NEXT: .LBB16_4: -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: addi a5, a1, 384 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: li t1, 48 -; CHECK-NEXT: mul t0, t0, t1 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: addi t0, a6, -32 -; CHECK-NEXT: sltu a6, a6, t0 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, t0 -; CHECK-NEXT: addi t0, a6, -16 -; CHECK-NEXT: sltu t1, a6, t0 -; CHECK-NEXT: addi t1, t1, -1 -; CHECK-NEXT: and t0, t1, t0 -; CHECK-NEXT: csrr t1, vlenb -; CHECK-NEXT: li t2, 56 -; CHECK-NEXT: mul t1, t1, t2 -; CHECK-NEXT: add t1, sp, t1 -; CHECK-NEXT: addi t1, t1, 16 -; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a6, a2, .LBB16_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v6, 2 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 48 -; CHECK-NEXT: mul a5, a5, t0 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: bltu a4, a3, .LBB16_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a5, -16 -; CHECK-NEXT: sltu a5, a5, a1 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a1, a5, a1 -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a1, a1, a5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a2, .LBB16_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB16_10: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v25, v7, 2 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 4 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v24, v16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 6 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 6 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a4, a1, -16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sltu a1, a1, a4 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a1, a1, a4 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a7, a2, .LBB16_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: li a7, 16 -; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 16 -; CHECK-NEXT: vse32.v v24, (a0) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a0, a0, 384 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 6 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 72 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; RV32-LABEL: vtrunc_v128i32_v128i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb +; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v7, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v5, v0, 8 +; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v4, v0, 4 +; RV32-NEXT: addi a2, a7, -64 +; RV32-NEXT: vslidedown.vi v3, v5, 4 +; RV32-NEXT: sltu a3, a7, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a4, a3, a2 +; RV32-NEXT: addi a2, a4, -32 +; RV32-NEXT: sltu a3, a4, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a3, a3, a2 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: addi t0, a3, -16 +; RV32-NEXT: mv a5, a3 +; RV32-NEXT: bltu a3, a2, .LBB16_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a5, 16 +; RV32-NEXT: .LBB16_2: +; RV32-NEXT: li t2, 64 +; RV32-NEXT: sltu t1, a3, t0 +; RV32-NEXT: mv a6, a7 +; RV32-NEXT: bltu a7, t2, .LBB16_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a6, 64 +; RV32-NEXT: .LBB16_4: +; RV32-NEXT: addi t3, a1, 128 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v6, v4, 2 +; RV32-NEXT: addi s0, a1, 512 +; RV32-NEXT: addi t6, a1, 640 +; RV32-NEXT: vslidedown.vi v0, v3, 2 +; RV32-NEXT: addi t1, t1, -1 +; RV32-NEXT: addi t2, a1, 384 +; RV32-NEXT: vslidedown.vi v2, v5, 2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: addi t4, a6, -32 +; RV32-NEXT: sltu a6, a6, t4 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: and a6, a6, t4 +; RV32-NEXT: addi t4, a6, -16 +; RV32-NEXT: sltu t5, a6, t4 +; RV32-NEXT: addi t5, t5, -1 +; RV32-NEXT: bltu a6, a2, .LBB16_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: li a6, 16 +; RV32-NEXT: .LBB16_6: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (s0) +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: li a0, 56 +; RV32-NEXT: mul s0, s0, a0 +; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: addi s0, s0, 16 +; RV32-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v16, (t6) +; RV32-NEXT: vle64.v v8, (t3) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: slli t3, t3, 3 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: li t6, 48 +; RV32-NEXT: mul t3, t3, t6 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v8, (t2) +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 16 +; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV32-NEXT: and t2, t1, t0 +; RV32-NEXT: and t1, t5, t4 +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bltu a4, a3, .LBB16_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: li t0, 32 +; RV32-NEXT: .LBB16_8: +; RV32-NEXT: vsetvli zero, t2, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV32-NEXT: addi t2, sp, 16 +; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: li t3, 56 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 16 +; RV32-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, t1, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li t1, 24 +; RV32-NEXT: mul a5, a5, t1 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: addi a5, t0, -16 +; RV32-NEXT: sltu t0, t0, a5 +; RV32-NEXT: addi t0, t0, -1 +; RV32-NEXT: and a5, t0, a5 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v6, v7, 2 +; RV32-NEXT: vmv1r.v v0, v4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li t0, 48 +; RV32-NEXT: mul a1, a1, t0 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a1, a1, a6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v24, v16, 0, v0.t +; RV32-NEXT: bltu a4, a2, .LBB16_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: li a4, 16 +; RV32-NEXT: .LBB16_10: +; RV32-NEXT: vmv1r.v v0, v5 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: bltu a7, a3, .LBB16_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: .LBB16_12: +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vslideup.vi v24, v8, 16 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, a1, -16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v16, 16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 48 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 24 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v16, 16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 48 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: sltu a1, a1, a4 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV32-NEXT: bltu a7, a2, .LBB16_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: li a7, 16 +; RV32-NEXT: .LBB16_14: +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vslideup.vi v16, v8, 16 +; RV32-NEXT: vse32.v v16, (a0) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a0, a0, 384 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vtrunc_v128i32_v128i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: .cfi_def_cfa_offset 48 +; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset s0, -8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 72 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb +; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v7, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 40 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v5, v0, 8 +; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v4, v0, 4 +; RV64-NEXT: addi a2, a7, -64 +; RV64-NEXT: vslidedown.vi v3, v5, 4 +; RV64-NEXT: sltu a3, a7, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a4, a3, a2 +; RV64-NEXT: addi a2, a4, -32 +; RV64-NEXT: sltu a3, a4, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a3, a3, a2 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: addi t0, a3, -16 +; RV64-NEXT: mv a5, a3 +; RV64-NEXT: bltu a3, a2, .LBB16_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a5, 16 +; RV64-NEXT: .LBB16_2: +; RV64-NEXT: li t2, 64 +; RV64-NEXT: sltu t1, a3, t0 +; RV64-NEXT: mv a6, a7 +; RV64-NEXT: bltu a7, t2, .LBB16_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a6, 64 +; RV64-NEXT: .LBB16_4: +; RV64-NEXT: addi t3, a1, 128 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v6, v4, 2 +; RV64-NEXT: addi s0, a1, 512 +; RV64-NEXT: addi t6, a1, 640 +; RV64-NEXT: vslidedown.vi v0, v3, 2 +; RV64-NEXT: addi t1, t1, -1 +; RV64-NEXT: addi t2, a1, 384 +; RV64-NEXT: vslidedown.vi v2, v5, 2 +; RV64-NEXT: li a3, 32 +; RV64-NEXT: addi t4, a6, -32 +; RV64-NEXT: sltu a6, a6, t4 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: and a6, a6, t4 +; RV64-NEXT: addi t4, a6, -16 +; RV64-NEXT: sltu t5, a6, t4 +; RV64-NEXT: addi t5, t5, -1 +; RV64-NEXT: bltu a6, a2, .LBB16_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: li a6, 16 +; RV64-NEXT: .LBB16_6: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (s0) +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a0, 56 +; RV64-NEXT: mul s0, s0, a0 +; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 32 +; RV64-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v16, (t6) +; RV64-NEXT: vle64.v v8, (t3) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 3 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: li t6, 48 +; RV64-NEXT: mul t3, t3, t6 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (t2) +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: slli t2, t2, 4 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 32 +; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV64-NEXT: and t2, t1, t0 +; RV64-NEXT: and t1, t5, t4 +; RV64-NEXT: addi a1, a1, 256 +; RV64-NEXT: mv t0, a4 +; RV64-NEXT: bltu a4, a3, .LBB16_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: li t0, 32 +; RV64-NEXT: .LBB16_8: +; RV64-NEXT: vsetvli zero, t2, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV64-NEXT: addi t2, sp, 32 +; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: li t3, 56 +; RV64-NEXT: mul t2, t2, t3 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 32 +; RV64-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, t1, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li t1, 24 +; RV64-NEXT: mul a5, a5, t1 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: addi a5, t0, -16 +; RV64-NEXT: sltu t0, t0, a5 +; RV64-NEXT: addi t0, t0, -1 +; RV64-NEXT: and a5, t0, a5 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v6, v7, 2 +; RV64-NEXT: vmv1r.v v0, v4 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li t0, 48 +; RV64-NEXT: mul a1, a1, t0 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a1, a1, a6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v24, v16, 0, v0.t +; RV64-NEXT: bltu a4, a2, .LBB16_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: li a4, 16 +; RV64-NEXT: .LBB16_10: +; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a4, 48 +; RV64-NEXT: mul a1, a1, a4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: mv a1, a7 +; RV64-NEXT: bltu a7, a3, .LBB16_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: li a1, 32 +; RV64-NEXT: .LBB16_12: +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: addi a4, sp, 32 +; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 24 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 24 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vslideup.vi v24, v8, 16 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a4, a1, -16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 48 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 24 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 48 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: sltu a1, a1, a4 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a4 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV64-NEXT: bltu a7, a2, .LBB16_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: li a7, 16 +; RV64-NEXT: .LBB16_14: +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vslideup.vi v16, v8, 16 +; RV64-NEXT: vse32.v v16, (a0) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 56 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: addi a0, a0, 384 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 72 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 48 +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret %v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl) ret <128 x i32> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 6c9989775f790..8e2e8f3fb0dec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -402,29 +402,29 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: sltu a3, a3, a4 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: addi a5, a3, -16 ; CHECK-NEXT: addi a4, a1, 128 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: addi a3, a2, -32 -; CHECK-NEXT: sltu a4, a2, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: addi a7, a2, -32 +; CHECK-NEXT: sltu a3, a3, a5 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a6, a3, a5 +; CHECK-NEXT: sltu a3, a2, a7 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a5, a3, a7 ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: bltu a4, a3, .LBB32_4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: bltu a5, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a5, a1, 256 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: addi a4, a1, 256 +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a4), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 557882ee31d4c..c00723cf60e57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -94,9 +94,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -124,9 +125,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -163,9 +165,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -193,9 +196,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -299,9 +303,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -329,9 +334,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -368,9 +374,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -398,9 +405,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index f9b5095c9af1d..9b5bde2814fda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1515,40 +1515,36 @@ define @vp_floor_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @nearbyint_nxv16bf16( %x) { define @nearbyint_nxv32bf16( %x) { ; CHECK-LABEL: nearbyint_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: vfabs.v v8, v24 -; CHECK-NEXT: vmflt.vf v7, v8, fa5 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv32bf16( %x) ret %a @@ -392,50 +377,35 @@ define @nearbyint_nxv32f16( %x) { ; ; ZVFHMIN-LABEL: nearbyint_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: sub sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 ; ZVFHMIN-NEXT: frflags a0 +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: vfabs.v v8, v24 -; ZVFHMIN-NEXT: vmflt.vf v7, v8, fa5 -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsflags a0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: frflags a0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: fsflags a0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv32f16( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index b569efc7447da..cb7961cb9bd8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -1186,21 +1186,21 @@ define @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @vpload_nxv8i64(ptr %ptr, %m, i32 ze ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK $noreg, [[COPY2]], $v0, [[COPY]], 6 /* e64 */, 1 /* ta, mu */ :: (load unknown-size from %ir.ptr, align 64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vmv0 = COPY [[COPY1]] + ; CHECK-NEXT: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK $noreg, [[COPY2]], [[COPY3]], [[COPY]], 6 /* e64 */, 1 /* ta, mu */ :: (load unknown-size from %ir.ptr, align 64) ; CHECK-NEXT: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] ; CHECK-NEXT: PseudoRET implicit $v8m8 %load = call @llvm.vp.load.nxv8i64.p0(ptr %ptr, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir index 6fe228f44a1c8..2d49b4e4f493f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -17,9 +17,9 @@ body: | ; CHECK: liveins: $v0, $v1, $v2, $v3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: renamable $v8 = PseudoVMERGE_VIM_M1 undef renamable $v8, killed renamable $v2, 1, killed renamable $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: renamable $v0 = COPY killed renamable $v1, implicit $vtype - ; CHECK-NEXT: renamable $v9 = PseudoVMERGE_VIM_M1 undef renamable $v9, killed renamable $v3, 1, killed renamable $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: renamable $v8 = PseudoVMERGE_VIM_M1 undef renamable $v8, killed renamable $v2, 1, $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v0 = COPY killed renamable $v1, implicit $vtype + ; CHECK-NEXT: renamable $v9 = PseudoVMERGE_VIM_M1 undef renamable $v9, killed renamable $v3, 1, $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: renamable $v0 = PseudoVADD_VV_M1 undef renamable $v0, killed renamable $v8, killed renamable $v9, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET implicit $v0 %0:vr = COPY $v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 7d3700492ea7b..a325829d472db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -1495,47 +1495,73 @@ declare @llvm.vp.nearbyint.nxv16f64( @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll index 8457f3d2c149c..c6662e092aa5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll @@ -14,8 +14,8 @@ define @foo( %x, @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 39744dcecd718..bc4b3ad7f79f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1515,40 +1515,36 @@ define @vp_round_nxv16f64( %va, @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 1300d8cd64ebb..75615fe0fe759 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1515,40 +1515,36 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir index 936fa21763eba..a050034c63168 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir @@ -58,15 +58,13 @@ body: | ; CHECK-NEXT: %true:vr = COPY $v9 ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 /* e8 */ - ; CHECK-NEXT: $v0 = COPY %mask ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ %false:vr = COPY $v8 %pt:vrnov0 = COPY $v8 %true:vr = COPY $v9 %avl:gprnox0 = COPY $x1 %mask:vmv0 = PseudoVMSET_M_B8 %avl, 0 - $v0 = COPY %mask - %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl, 5 ... --- name: same_mask @@ -78,18 +76,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %pt:vr = COPY $v8 ; CHECK-NEXT: %false:vrnov0 = COPY $v9 - ; CHECK-NEXT: %mask:vr = COPY $v0 - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ - ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */ %pt:vrnov0 = COPY $v8 %false:vrnov0 = COPY $v9 - %mask:vr = COPY $v0 - $v0 = COPY %mask - %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ - $v0 = COPY %mask - %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */ + %mask:vmv0 = COPY $v0 + %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */ ... --- # Shouldn't be converted because false operands are different @@ -102,18 +96,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 ; CHECK-NEXT: %false:vrnov0 = COPY $v9 - ; CHECK-NEXT: %mask:vr = COPY $v0 - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */ + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */ %pt:vrnov0 = COPY $v8 %false:vrnov0 = COPY $v9 - %mask:vr = COPY $v0 - $v0 = COPY %mask - %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ - $v0 = COPY %mask - %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */ + %mask:vmv0 = COPY $v0 + %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */ ... --- # Shouldn't be converted because EEWs are different @@ -126,18 +116,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %pt:vrnov0 = COPY $v8 ; CHECK-NEXT: %false:vrnov0 = COPY $v9 - ; CHECK-NEXT: %mask:vr = COPY $v0 - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */ - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */ + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 4 /* e16 */, 0 /* tu, mu */ + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */ %pt:vrnov0 = COPY $v8 %false:vrnov0 = COPY $v9 - %mask:vr = COPY $v0 - $v0 = COPY %mask - %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */ - $v0 = COPY %mask - %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */ + %mask:vmv0 = COPY $v0 + %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 4 /* e16 */, 0 /* tu, mu */ + %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */ ... --- name: same_mask_undef_truepassthru @@ -148,16 +134,12 @@ body: | ; CHECK: liveins: $v8, $v0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %false:vrnov0 = COPY $v8 - ; CHECK-NEXT: %mask:vr = COPY $v0 - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 1 /* ta, mu */ - ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 1 /* ta, mu */ %false:vr = COPY $v8 - %mask:vr = COPY $v0 - $v0 = COPY %mask - %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ - $v0 = COPY %mask - %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, 4, 5 /* e32 */ + %mask:vmv0 = COPY $v0 + %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */ ... --- # Shouldn't be converted because true is in a different block @@ -169,19 +151,15 @@ body: | ; CHECK-NEXT: liveins: $v8, $v0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %false:vr = COPY $v8 - ; CHECK-NEXT: %mask:vr = COPY $v0 - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, 4, 5 /* e32 */ + ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */ bb.0: liveins: $v8, $v0 %false:vr = COPY $v8 - %mask:vr = COPY $v0 - $v0 = COPY %mask - %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */ + %mask:vmv0 = COPY $v0 + %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */ bb.1: - $v0 = COPY %mask - %5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, 4, 5 /* e32 */ + %5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */ diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index c01cbf49483b7..0c058b562f53d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -15,8 +15,8 @@ define void @vpmerge_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) @@ -34,8 +34,8 @@ define void @vpselect_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index e6272701a6033..92d2562168ac7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1358,19 +1358,17 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @strided_vpload_nxv1i8_i8(ptr %ptr, i8 signext %stride, ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVLSE8_V_MF8_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE8_V_MF8_MASK $noreg, [[COPY3]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */, 1 /* ta, mu */ :: (load unknown-size, align 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]] + ; CHECK-NEXT: [[PseudoVLSE8_V_MF8_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE8_V_MF8_MASK $noreg, [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 1 /* ta, mu */ :: (load unknown-size, align 1) ; CHECK-NEXT: $v8 = COPY [[PseudoVLSE8_V_MF8_MASK]] ; CHECK-NEXT: PseudoRET implicit $v8 %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 %stride, %m, i32 %evl) @@ -36,8 +36,8 @@ define void @strided_vpstore_nxv1i8_i8( %val, ptr %ptr, i8 sign ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: PseudoVSSE8_V_MF8_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */ :: (store unknown-size, align 1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vmv0 = COPY [[COPY1]] + ; CHECK-NEXT: PseudoVSSE8_V_MF8_MASK [[COPY4]], [[COPY3]], [[COPY2]], [[COPY5]], [[COPY]], 3 /* e8 */ :: (store unknown-size, align 1) ; CHECK-NEXT: PseudoRET call void @llvm.experimental.vp.strided.store.nxv1i8.p0.i8( %val, ptr %ptr, i8 %stride, %m, i32 %evl) ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index ecd098edb30ae..60b29c98eb665 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -769,24 +769,24 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %data, <16 x i8> %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -29,12 +30,12 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -53,12 +54,12 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB2_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -77,14 +78,14 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: vid.v v9, v0.t ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v9, v9, v9 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vid.v v9, v0.t +; RV32-NEXT: vredmaxu.vs v9, v9, v9 ; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -100,12 +101,12 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vcpop.m a1, v0 -; RV64-NEXT: vid.v v9, v0.t ; RV64-NEXT: beqz a1, .LBB3_2 ; RV64-NEXT: # %bb.1: +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vid.v v9, v0.t ; RV64-NEXT: vredmaxu.vs v9, v9, v9 ; RV64-NEXT: vmv.x.s a0, v9 ; RV64-NEXT: andi a0, a0, 255 @@ -124,12 +125,12 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a0, .LBB4_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -148,12 +149,12 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a0, .LBB5_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -170,12 +171,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % define i8 @extract_last_i8_scalable( %data, %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -191,12 +193,13 @@ define i8 @extract_last_i8_scalable( %data, define i16 @extract_last_i16_scalable( %data, %mask, i16 %passthru) { ; CHECK-LABEL: extract_last_i16_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -212,12 +215,13 @@ define i16 @extract_last_i16_scalable( %data, %data, %mask, i32 %passthru) { ; CHECK-LABEL: extract_last_i32_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -233,14 +237,15 @@ define i32 @extract_last_i32_scalable( %data, %data, %mask, i64 %passthru) { ; RV32-LABEL: extract_last_i64_scalable: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: vid.v v10, v0.t ; RV32-NEXT: beqz a2, .LBB9_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v10, v10, v10 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; RV32-NEXT: vid.v v10, v0.t +; RV32-NEXT: vredmaxu.vs v10, v10, v10 ; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -254,12 +259,13 @@ define i64 @extract_last_i64_scalable( %data, %data, %data, %mask, float %passthru) { ; CHECK-LABEL: extract_last_float_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB10_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -296,12 +303,13 @@ define float @extract_last_float_scalable( %data, %data, %mask, double %passthru) { ; CHECK-LABEL: extract_last_double_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB11_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll index 6435c1c14e061..fd1dbab2362a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll @@ -222,9 +222,9 @@ define @vadd_vv_mask_negative( %0, @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 9e78bbdc4f441..6831d1fb63cae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -429,16 +429,16 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32f16( %va, @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 -; ZVFHMIN-NEXT: vl8re16.v v8, (a0) +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a0, a3, 1 @@ -8516,25 +8515,25 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v8, a2 +; ZVFHMIN-NEXT: vxor.vx v16, v24, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: vmv4r.v v8, v16 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -8543,35 +8542,32 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB281_2: -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8579,15 +8575,12 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -10079,36 +10072,34 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: addi a4, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t @@ -10119,34 +10110,31 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB292_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -10307,6 +10295,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10315,11 +10304,11 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 @@ -10330,14 +10319,15 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -10345,7 +10335,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 @@ -10481,7 +10471,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @llvm.vp.fptrunc.nxv16f64.nxv16f32( @vfptrunc_nxv16f32_nxv16f64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 @@ -113,24 +105,16 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) ret %v @@ -144,58 +128,68 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: slli a6, a1, 3 -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: vslidedown.vx v16, v0, a5 -; CHECK-NEXT: add a6, a0, a6 -; CHECK-NEXT: sub a5, a2, a4 -; CHECK-NEXT: vl8re64.v v24, (a6) -; CHECK-NEXT: sltu a6, a2, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: sub a6, a5, a1 -; CHECK-NEXT: sltu a7, a5, a6 -; CHECK-NEXT: addi a7, a7, -1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB8_2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a6, a0, a4 +; CHECK-NEXT: sub a0, a2, a3 +; CHECK-NEXT: sltu a4, a2, a0 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a4, a0, a1 +; CHECK-NEXT: sltu a7, a0, a4 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a4, a7, a4 +; CHECK-NEXT: srli a7, a1, 2 +; CHECK-NEXT: vl8re64.v v8, (a6) +; CHECK-NEXT: vslidedown.vx v16, v0, a7 +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v25, v0, a5 +; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: bltu a0, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB8_4 +; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t @@ -203,9 +197,9 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -213,7 +207,8 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 059408a1c9c3f..056c7557440e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -391,16 +391,16 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32f16( %va, %maskedoff, ptr %p, @vpgather_baseidx_nxv32i8(ptr %base, @vpload_nxv17f64(ptr %ptr, ptr %out, %v, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v11, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v10, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB22_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -371,21 +372,22 @@ define float @vreduce_fminimum_nxv4f32(float %start, %val, define float @vreduce_fmaximum_nxv4f32(float %start, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v11, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v10, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB23_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB23_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -421,21 +423,22 @@ define float @vreduce_fmaximum_nnan_nxv4f32(float %start, % define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v8, v0.t +; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v9, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB26_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) @@ -445,21 +448,22 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v8, v0.t +; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v9, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB27_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index fd5bf4ebcede8..32d24778d7327 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -285,58 +285,68 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, Date: Fri, 31 Jan 2025 14:57:05 +0800 Subject: [PATCH 2/4] Move pass to just after RISCVVectorPeephole --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 8 +- llvm/test/CodeGen/RISCV/O0-pipeline.ll | 2 +- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 44 +- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 53 +- llvm/test/CodeGen/RISCV/rvv/commutable.ll | 69 +- llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 82 +- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 79 +- llvm/test/CodeGen/RISCV/rvv/expandload.ll | 42 +- .../RISCV/rvv/fixed-vectors-bswap-vp.ll | 32 +- .../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 150 ++- .../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 105 +- .../RISCV/rvv/fixed-vectors-cttz-vp.ll | 316 ++---- .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 26 +- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 26 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 32 +- .../rvv/fixed-vectors-interleaved-access.ll | 211 ++-- .../RISCV/rvv/fixed-vectors-rint-vp.ll | 47 +- .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 952 ++++++------------ .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 34 +- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 30 +- .../RISCV/rvv/fixed-vectors-vselect.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 53 +- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 33 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 60 +- .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 33 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 60 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 173 ++-- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 53 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 53 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 53 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 152 ++- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 60 +- .../RISCV/rvv/vector-extract-last-active.ll | 96 +- .../RISCV/rvv/vector-reassociations.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 602 ++++++----- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 103 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 16 +- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 18 +- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 20 +- .../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 68 +- llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 77 +- llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 26 +- 55 files changed, 2018 insertions(+), 2363 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index b2b77584c414d..772b8541a8bf0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -588,6 +588,8 @@ void RISCVPassConfig::addPreEmitPass2() { void RISCVPassConfig::addMachineSSAOptimization() { addPass(createRISCVVectorPeepholePass()); + // TODO: Move this to pre regalloc + addPass(createRISCVVMV0EliminationPass()); TargetPassConfig::addMachineSSAOptimization(); @@ -600,6 +602,10 @@ void RISCVPassConfig::addMachineSSAOptimization() { } void RISCVPassConfig::addPreRegAlloc() { + // TODO: Move this as late as possible before regalloc + if (TM->getOptLevel() == CodeGenOptLevel::None) + addPass(createRISCVVMV0EliminationPass()); + addPass(createRISCVPreRAExpandPseudoPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); @@ -613,8 +619,6 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner) addPass(&MachinePipelinerID); - - addPass(createRISCVVMV0EliminationPass()); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index f93cb65897210..a50c303819f23 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -39,11 +39,11 @@ ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index a2b5e9c86a107..2646dfeca4eb6 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -97,6 +97,7 @@ ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: RISC-V Vector Peephole Optimization +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs @@ -127,7 +128,6 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 4d34621cd5f24..68f3de89f1f5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -3065,18 +3065,18 @@ define @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll index 5f35626120178..e26c467f025bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll +++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll @@ -26,9 +26,10 @@ define @commutable_vadd_vv_masked( %0, @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -58,9 +59,10 @@ define @commutable_vand_vv_masked( %0, @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -90,9 +92,10 @@ define @commutable_vor_vv_masked( %0, @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -122,9 +125,10 @@ define @commutable_vxor_vv_masked( %0, @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -154,9 +158,10 @@ define @commutable_vmseq_vv_masked( %0, @llvm.riscv.vmseq.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmseq.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -186,9 +191,10 @@ define @commutable_vmsne_vv_masked( %0, @llvm.riscv.vmsne.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmsne.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -218,9 +224,10 @@ define @commutable_vmin_vv_masked( %0, @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -250,9 +257,10 @@ define @commutable_vminu_vv_masked( %0, @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -282,9 +290,10 @@ define @commutable_vmax_vv_masked( %0, @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -314,9 +323,10 @@ define @commutable_vmaxu_vv_masked( %0, @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -346,9 +356,10 @@ define @commutable_vmul_vv_masked( %0, @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -378,9 +389,10 @@ define @commutable_vmulh_vv_masked( %0, @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -410,9 +422,10 @@ define @commutable_vmulhu_vv_masked( %0, @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -443,8 +456,9 @@ define @commutable_vwadd_vv_masked( %0, @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -475,8 +489,9 @@ define @commutable_vwaddu_vv_masked( %0, @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -507,8 +522,9 @@ define @commutable_vwmul_vv_masked( %0, @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -539,8 +555,9 @@ define @commutable_vwmulu_vv_masked( %0, @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -620,9 +637,10 @@ define @commutable_vadc_vv( %0, @llvm.riscv.vadc.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2) @@ -653,9 +671,10 @@ define @commutable_vsadd_vv_masked( %0, @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -685,9 +704,10 @@ define @commutable_vsaddu_vv_masked( %0, @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -719,9 +739,10 @@ define @commutable_vaadd_vv_masked( %0, @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -753,9 +774,10 @@ define @commutable_vaaddu_vv_masked( %0, @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -787,9 +809,10 @@ define @commutable_vsmul_vv_masked( %0, @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 52ddd9ab2f832..d95b026a239c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2018,14 +2018,14 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v8, v7 +; CHECK-RV64-NEXT: viota.m v16, v7 +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 24 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 @@ -1157,6 +1163,12 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 24 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1374,7 +1386,13 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v8, v7 +; CHECK-RV32-NEXT: viota.m v16, v7 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 24 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vmv1r.v v0, v7 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 @@ -1386,6 +1404,12 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 24 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1447,7 +1471,13 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v8, v7 +; CHECK-RV64-NEXT: viota.m v16, v7 +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 24 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 @@ -1459,6 +1489,12 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 24 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index d765e4c0b8f6a..4f7bc5ca43ecb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -768,17 +768,17 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: li a2, 56 +; RV32-NEXT: lui a2, 1044480 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: addi a2, a3, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb @@ -814,9 +814,9 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -1031,17 +1031,17 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: li a2, 56 +; RV32-NEXT: lui a2, 1044480 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: addi a2, a3, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb @@ -1077,9 +1077,9 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 9d0d42cf754c5..421105cfd110d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -2108,22 +2108,27 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 @@ -2142,29 +2147,29 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 @@ -2196,14 +2201,14 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2219,14 +2224,14 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -2286,35 +2291,18 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -2324,7 +2312,8 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2332,14 +2321,14 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4814,22 +4803,27 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 @@ -4848,29 +4842,29 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 @@ -4902,14 +4896,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4925,14 +4919,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -4992,35 +4986,18 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -5030,7 +5007,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5038,14 +5016,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index a5a1061842427..fabce69bd2d60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1525,7 +1525,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1557,89 +1558,63 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v24, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v8, v0.t ; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v8, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: addi a2, sp, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: sltu a0, a0, a2 @@ -1647,22 +1622,23 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v24, v8, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1672,21 +1648,20 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 5f275da1740cb..b7a53010ab6de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1769,54 +1769,39 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vsub.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 @@ -1824,16 +1809,16 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb @@ -1841,10 +1826,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t @@ -1858,10 +1840,6 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 @@ -1880,8 +1858,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1892,44 +1869,18 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -1938,10 +1889,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -1968,34 +1916,34 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a4, a0 +; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB34_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a4, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB34_2: ; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: lui a4, 209715 ; RV64-NEXT: lui a5, 61681 ; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: addiw a4, a4, 819 ; RV64-NEXT: addiw a7, a5, -241 ; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a2, 32 -; RV64-NEXT: add a6, a2, a6 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a5, a3, a5 -; RV64-NEXT: slli a2, a7, 32 -; RV64-NEXT: add a2, a7, a2 -; RV64-NEXT: slli a3, t0, 32 -; RV64-NEXT: add a3, t0, a3 +; RV64-NEXT: slli a6, a3, 32 +; RV64-NEXT: add a6, a3, a6 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a5, a4, a5 +; RV64-NEXT: slli a3, a7, 32 +; RV64-NEXT: add a3, a7, a3 +; RV64-NEXT: slli a4, t0, 32 +; RV64-NEXT: add a4, t0, a4 ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a7, a0, a7 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -2008,17 +1956,17 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t @@ -2032,8 +1980,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -3962,54 +3910,39 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vsub.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 @@ -4017,16 +3950,16 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb @@ -4034,10 +3967,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t @@ -4051,10 +3981,6 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 @@ -4073,8 +3999,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4085,44 +4010,18 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -4131,10 +4030,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -4161,34 +4057,34 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a4, a0 +; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB70_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a4, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB70_2: ; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: lui a4, 209715 ; RV64-NEXT: lui a5, 61681 ; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a3, a3, 1365 +; RV64-NEXT: addiw a4, a4, 819 ; RV64-NEXT: addiw a7, a5, -241 ; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a2, 32 -; RV64-NEXT: add a6, a2, a6 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a5, a3, a5 -; RV64-NEXT: slli a2, a7, 32 -; RV64-NEXT: add a2, a7, a2 -; RV64-NEXT: slli a3, t0, 32 -; RV64-NEXT: add a3, t0, a3 +; RV64-NEXT: slli a6, a3, 32 +; RV64-NEXT: add a6, a3, a6 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a5, a4, a5 +; RV64-NEXT: slli a3, a7, 32 +; RV64-NEXT: add a3, a7, a3 +; RV64-NEXT: slli a4, t0, 32 +; RV64-NEXT: add a4, t0, a4 ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a7, a0, a7 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -4201,17 +4097,17 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t @@ -4225,8 +4121,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index 4f11e6c3c386a..12f325b8e78bf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -680,10 +680,10 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v8, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -724,7 +724,7 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vle64.v v16, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB25_2 @@ -733,11 +733,11 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmfeq.vv v7, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a2, -16 @@ -748,19 +748,19 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: vfmax.vv v16, v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vfmax.vv v16, v24, v16 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 2e2103ad5e06d..5b6759d066cd3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -680,10 +680,10 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v8, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -724,7 +724,7 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vle64.v v16, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB25_2 @@ -733,11 +733,11 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmfeq.vv v7, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a2, -16 @@ -748,19 +748,19 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: vfmin.vv v16, v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vfmin.vv v16, v24, v16 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index d33049bac14db..585a331e55094 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -3906,9 +3906,10 @@ define void @trunc_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -3970,8 +3971,10 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI172_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -3987,9 +3990,10 @@ define void @trunc_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -4082,10 +4086,11 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4152,9 +4157,11 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4171,10 +4178,11 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4272,10 +4280,11 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4342,9 +4351,11 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4361,10 +4372,11 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4462,10 +4474,11 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4532,9 +4545,11 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 +; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4551,10 +4566,11 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 1516c67bf7ecc..6a68065bb059a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -655,10 +655,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 88 +; RV64-NEXT: li a3, 92 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb ; RV64-NEXT: addi a3, a1, 128 ; RV64-NEXT: addi a6, a1, 256 ; RV64-NEXT: li a4, 128 @@ -678,20 +678,20 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vrgather.vi v4, v8, 4 ; RV64-NEXT: vrgather.vi v20, v8, 5 ; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 84 +; RV64-NEXT: li t0, 88 ; RV64-NEXT: mul a7, a7, t0 ; RV64-NEXT: add a7, sp, a7 ; RV64-NEXT: addi a7, a7, 16 ; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vrgatherei16.vv v20, v8, v16 ; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: slli a7, a7, 6 +; RV64-NEXT: slli a7, a7, 5 ; RV64-NEXT: add a7, sp, a7 ; RV64-NEXT: addi a7, a7, 16 ; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vrgatherei16.vv v20, v8, v17 ; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 56 +; RV64-NEXT: li t0, 76 ; RV64-NEXT: mul a7, a7, t0 ; RV64-NEXT: add a7, sp, a7 ; RV64-NEXT: addi a7, a7, 16 @@ -712,15 +712,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 8 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 40 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 5 +; RV64-NEXT: li a7, 48 +; RV64-NEXT: mul a4, a4, a7 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 16 ; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill @@ -728,7 +723,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -743,15 +738,20 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vi v4, v8, 2, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 60 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: vmv8r.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -782,18 +782,19 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vs2r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vmv.s.x v2, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 84 +; RV64-NEXT: li a3, 88 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -801,14 +802,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 84 +; RV64-NEXT: li a3, 88 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -833,89 +834,87 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vmv.s.x v2, a3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv8r.v v8, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v7, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v8, v16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t +; RV64-NEXT: vrgather.vi v4, v16, 4, v0.t +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v16, v8, v24, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v24, v8, 5, v0.t +; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 96 ; RV64-NEXT: li a3, 192 -; RV64-NEXT: vmv.s.x v3, a3 +; RV64-NEXT: vmv.s.x v1, a3 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v24, a1 -; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 72 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v28, v8, v24, v0.t -; RV64-NEXT: vmv4r.v v16, v8 +; RV64-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 72 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, %hi(.LCPI8_2) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_2) ; RV64-NEXT: li a3, 1040 @@ -923,69 +922,73 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a4, a4, 1 ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v5, a4 +; RV64-NEXT: vmv.v.x v8, a4 +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs1r.v v8, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle16.v v6, (a1) +; RV64-NEXT: vle16.v v2, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v24, v8, v24, v0 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 68 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 80 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl1r.v v20, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v28, v16, v5, v0.t +; RV64-NEXT: vrgatherei16.vv v24, v16, v20, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 68 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: addi a1, a2, -2016 ; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v16, v24, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v16, v24, v2 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 56 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 80 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -994,8 +997,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload @@ -1004,17 +1006,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 88 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1026,26 +1027,18 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 88 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vmv4r.v v16, v4 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v16, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -1058,12 +1051,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) ; RV64-NEXT: vle16.v v10, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 56 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs2r.v v10, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1071,7 +1065,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v12, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 48 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -1086,13 +1081,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 80 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 56 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl2r.v v20, (a1) # Unknown-size Folded Reload @@ -1117,21 +1113,20 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 84 +; RV64-NEXT: li a3, 88 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 88 +; RV64-NEXT: li a1, 92 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 266772d36ee9c..69e0f45ca4b0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -517,8 +517,19 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmv1r.v v25, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 @@ -526,36 +537,34 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 @@ -565,7 +574,7 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 037ed257f4a89..e9b676c2a6c22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -222,645 +222,316 @@ define <2 x i32> @vtrunc_v2i32_v2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32) define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { -; RV32-LABEL: vtrunc_v128i32_v128i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 72 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb -; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; RV32-NEXT: vmv1r.v v7, v0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vslidedown.vi v5, v0, 8 -; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vi v4, v0, 4 -; RV32-NEXT: addi a2, a7, -64 -; RV32-NEXT: vslidedown.vi v3, v5, 4 -; RV32-NEXT: sltu a3, a7, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a4, a3, a2 -; RV32-NEXT: addi a2, a4, -32 -; RV32-NEXT: sltu a3, a4, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: li a2, 16 -; RV32-NEXT: addi t0, a3, -16 -; RV32-NEXT: mv a5, a3 -; RV32-NEXT: bltu a3, a2, .LBB16_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a5, 16 -; RV32-NEXT: .LBB16_2: -; RV32-NEXT: li t2, 64 -; RV32-NEXT: sltu t1, a3, t0 -; RV32-NEXT: mv a6, a7 -; RV32-NEXT: bltu a7, t2, .LBB16_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a6, 64 -; RV32-NEXT: .LBB16_4: -; RV32-NEXT: addi t3, a1, 128 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v6, v4, 2 -; RV32-NEXT: addi s0, a1, 512 -; RV32-NEXT: addi t6, a1, 640 -; RV32-NEXT: vslidedown.vi v0, v3, 2 -; RV32-NEXT: addi t1, t1, -1 -; RV32-NEXT: addi t2, a1, 384 -; RV32-NEXT: vslidedown.vi v2, v5, 2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: addi t4, a6, -32 -; RV32-NEXT: sltu a6, a6, t4 -; RV32-NEXT: addi a6, a6, -1 -; RV32-NEXT: and a6, a6, t4 -; RV32-NEXT: addi t4, a6, -16 -; RV32-NEXT: sltu t5, a6, t4 -; RV32-NEXT: addi t5, t5, -1 -; RV32-NEXT: bltu a6, a2, .LBB16_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: li a6, 16 -; RV32-NEXT: .LBB16_6: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v8, (s0) -; RV32-NEXT: csrr s0, vlenb -; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: li a0, 56 -; RV32-NEXT: mul s0, s0, a0 -; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add s0, sp, s0 -; RV32-NEXT: addi s0, s0, 16 -; RV32-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v16, (t6) -; RV32-NEXT: vle64.v v8, (t3) -; RV32-NEXT: csrr t3, vlenb -; RV32-NEXT: slli t3, t3, 3 -; RV32-NEXT: add t3, sp, t3 -; RV32-NEXT: addi t3, t3, 16 -; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v8, (a1) -; RV32-NEXT: csrr t3, vlenb -; RV32-NEXT: li t6, 48 -; RV32-NEXT: mul t3, t3, t6 -; RV32-NEXT: add t3, sp, t3 -; RV32-NEXT: addi t3, t3, 16 -; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v8, (t2) -; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: slli t2, t2, 4 -; RV32-NEXT: add t2, sp, t2 -; RV32-NEXT: addi t2, t2, 16 -; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill -; RV32-NEXT: and t2, t1, t0 -; RV32-NEXT: and t1, t5, t4 -; RV32-NEXT: addi a1, a1, 256 -; RV32-NEXT: mv t0, a4 -; RV32-NEXT: bltu a4, a3, .LBB16_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: li t0, 32 -; RV32-NEXT: .LBB16_8: -; RV32-NEXT: vsetvli zero, t2, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t -; RV32-NEXT: addi t2, sp, 16 -; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: li t3, 56 -; RV32-NEXT: mul t2, t2, t3 -; RV32-NEXT: add t2, sp, t2 -; RV32-NEXT: addi t2, t2, 16 -; RV32-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, t1, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li t1, 24 -; RV32-NEXT: mul a5, a5, t1 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: addi a5, t0, -16 -; RV32-NEXT: sltu t0, t0, a5 -; RV32-NEXT: addi t0, t0, -1 -; RV32-NEXT: and a5, t0, a5 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v16, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v6, v7, 2 -; RV32-NEXT: vmv1r.v v0, v4 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li t0, 48 -; RV32-NEXT: mul a1, a1, t0 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a1, a1, a6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v2 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v24, v16, 0, v0.t -; RV32-NEXT: bltu a4, a2, .LBB16_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: li a4, 16 -; RV32-NEXT: .LBB16_10: -; RV32-NEXT: vmv1r.v v0, v5 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: mv a1, a7 -; RV32-NEXT: bltu a7, a3, .LBB16_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: .LBB16_12: -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 6 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vslideup.vi v24, v8, 16 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 6 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, a1, -16 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v8, v16, 16 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 48 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v8, v16, 16 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 48 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: sltu a1, a1, a4 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a1, a1, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t -; RV32-NEXT: bltu a7, a2, .LBB16_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: li a7, 16 -; RV32-NEXT: .LBB16_14: -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vslideup.vi v16, v8, 16 -; RV32-NEXT: vse32.v v16, (a0) -; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 48 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 56 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: addi a0, a0, 384 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 72 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 32 -; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: .cfi_restore s0 -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: ret -; -; RV64-LABEL: vtrunc_v128i32_v128i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -48 -; RV64-NEXT: .cfi_def_cfa_offset 48 -; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset s0, -8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 72 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb -; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v7, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 32 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 32 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v5, v0, 8 -; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vi v4, v0, 4 -; RV64-NEXT: addi a2, a7, -64 -; RV64-NEXT: vslidedown.vi v3, v5, 4 -; RV64-NEXT: sltu a3, a7, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a4, a3, a2 -; RV64-NEXT: addi a2, a4, -32 -; RV64-NEXT: sltu a3, a4, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a3, a3, a2 -; RV64-NEXT: li a2, 16 -; RV64-NEXT: addi t0, a3, -16 -; RV64-NEXT: mv a5, a3 -; RV64-NEXT: bltu a3, a2, .LBB16_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a5, 16 -; RV64-NEXT: .LBB16_2: -; RV64-NEXT: li t2, 64 -; RV64-NEXT: sltu t1, a3, t0 -; RV64-NEXT: mv a6, a7 -; RV64-NEXT: bltu a7, t2, .LBB16_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: li a6, 64 -; RV64-NEXT: .LBB16_4: -; RV64-NEXT: addi t3, a1, 128 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v6, v4, 2 -; RV64-NEXT: addi s0, a1, 512 -; RV64-NEXT: addi t6, a1, 640 -; RV64-NEXT: vslidedown.vi v0, v3, 2 -; RV64-NEXT: addi t1, t1, -1 -; RV64-NEXT: addi t2, a1, 384 -; RV64-NEXT: vslidedown.vi v2, v5, 2 -; RV64-NEXT: li a3, 32 -; RV64-NEXT: addi t4, a6, -32 -; RV64-NEXT: sltu a6, a6, t4 -; RV64-NEXT: addi a6, a6, -1 -; RV64-NEXT: and a6, a6, t4 -; RV64-NEXT: addi t4, a6, -16 -; RV64-NEXT: sltu t5, a6, t4 -; RV64-NEXT: addi t5, t5, -1 -; RV64-NEXT: bltu a6, a2, .LBB16_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: li a6, 16 -; RV64-NEXT: .LBB16_6: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (s0) -; RV64-NEXT: csrr s0, vlenb -; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a0, 56 -; RV64-NEXT: mul s0, s0, a0 -; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s0, sp, s0 -; RV64-NEXT: addi s0, s0, 32 -; RV64-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v16, (t6) -; RV64-NEXT: vle64.v v8, (t3) -; RV64-NEXT: csrr t3, vlenb -; RV64-NEXT: slli t3, t3, 3 -; RV64-NEXT: add t3, sp, t3 -; RV64-NEXT: addi t3, t3, 32 -; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: csrr t3, vlenb -; RV64-NEXT: li t6, 48 -; RV64-NEXT: mul t3, t3, t6 -; RV64-NEXT: add t3, sp, t3 -; RV64-NEXT: addi t3, t3, 32 -; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v8, (t2) -; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: slli t2, t2, 4 -; RV64-NEXT: add t2, sp, t2 -; RV64-NEXT: addi t2, t2, 32 -; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill -; RV64-NEXT: and t2, t1, t0 -; RV64-NEXT: and t1, t5, t4 -; RV64-NEXT: addi a1, a1, 256 -; RV64-NEXT: mv t0, a4 -; RV64-NEXT: bltu a4, a3, .LBB16_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: li t0, 32 -; RV64-NEXT: .LBB16_8: -; RV64-NEXT: vsetvli zero, t2, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t -; RV64-NEXT: addi t2, sp, 32 -; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v3 -; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: li t3, 56 -; RV64-NEXT: mul t2, t2, t3 -; RV64-NEXT: add t2, sp, t2 -; RV64-NEXT: addi t2, t2, 32 -; RV64-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v6 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, t1, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li t1, 24 -; RV64-NEXT: mul a5, a5, t1 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: addi a5, t0, -16 -; RV64-NEXT: sltu t0, t0, a5 -; RV64-NEXT: addi t0, t0, -1 -; RV64-NEXT: and a5, t0, a5 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v16, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v6, v7, 2 -; RV64-NEXT: vmv1r.v v0, v4 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li t0, 48 -; RV64-NEXT: mul a1, a1, t0 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a6, 56 -; RV64-NEXT: mul a1, a1, a6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v24, v16, 0, v0.t -; RV64-NEXT: bltu a4, a2, .LBB16_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: li a4, 16 -; RV64-NEXT: .LBB16_10: -; RV64-NEXT: vmv1r.v v0, v5 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a4, 48 -; RV64-NEXT: mul a1, a1, a4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: mv a1, a7 -; RV64-NEXT: bltu a7, a3, .LBB16_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: li a1, 32 -; RV64-NEXT: .LBB16_12: -; RV64-NEXT: vmv1r.v v0, v6 -; RV64-NEXT: addi a4, sp, 32 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a5, 24 -; RV64-NEXT: mul a4, a4, a5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a5, 24 -; RV64-NEXT: mul a4, a4, a5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 6 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vslideup.vi v24, v8, 16 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 6 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV64-NEXT: addi a4, a1, -16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 56 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v8, v16, 16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 56 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 48 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 24 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v8, v16, 16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 48 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV64-NEXT: sltu a1, a1, a4 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a4 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t -; RV64-NEXT: bltu a7, a2, .LBB16_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: li a7, 16 -; RV64-NEXT: .LBB16_14: -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vslideup.vi v16, v8, 16 -; RV64-NEXT: vse32.v v16, (a0) -; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 32 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 56 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 32 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: addi a0, a0, 384 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 72 -; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 48 -; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: .cfi_restore s0 -; RV64-NEXT: addi sp, sp, 48 -; RV64-NEXT: .cfi_def_cfa_offset 0 -; RV64-NEXT: ret +; CHECK-LABEL: vtrunc_v128i32_v128i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 72 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vi v6, v0, 8 +; CHECK-NEXT: addi a2, a1, 512 +; CHECK-NEXT: addi a3, a1, 640 +; CHECK-NEXT: addi a4, a7, -64 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v27, v6, 4 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a3) +; CHECK-NEXT: sltu a3, a7, a4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v27, 2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a4, a3, a4 +; CHECK-NEXT: addi a3, a4, -32 +; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a3, a5, a3 +; CHECK-NEXT: addi a5, a3, -16 +; CHECK-NEXT: sltu a6, a3, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 4 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v26, v7, 4 +; CHECK-NEXT: bltu a3, a2, .LBB16_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a5) +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 56 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v27, v26, 2 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 6 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: bltu a7, a5, .LBB16_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a6, 64 +; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: addi a5, a1, 384 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: li t1, 48 +; CHECK-NEXT: mul t0, t0, t1 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: addi t0, a6, -32 +; CHECK-NEXT: sltu a6, a6, t0 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a6, a6, t0 +; CHECK-NEXT: addi t0, a6, -16 +; CHECK-NEXT: sltu t1, a6, t0 +; CHECK-NEXT: addi t1, t1, -1 +; CHECK-NEXT: and t0, t1, t0 +; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: li t2, 56 +; CHECK-NEXT: mul t1, t1, t2 +; CHECK-NEXT: add t1, sp, t1 +; CHECK-NEXT: addi t1, t1, 16 +; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a6, a2, .LBB16_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: .LBB16_6: +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a1, 256 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v26, v6, 2 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li t0, 48 +; CHECK-NEXT: mul a5, a5, t0 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 56 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: bltu a4, a3, .LBB16_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: .LBB16_8: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a5, -16 +; CHECK-NEXT: sltu a5, a5, a1 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a1, a5, a1 +; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a5, 40 +; CHECK-NEXT: mul a1, a1, a5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a4, a2, .LBB16_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: .LBB16_10: +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v25, v7, 2 +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: mv a1, a7 +; CHECK-NEXT: bltu a7, a3, .LBB16_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: .LBB16_12: +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 4 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v24, v16 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 40 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 40 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 6 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vslideup.vi v16, v24, 16 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 6 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a1, -16 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 56 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 56 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: sltu a1, a1, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t +; CHECK-NEXT: bltu a7, a2, .LBB16_14 +; CHECK-NEXT: # %bb.13: +; CHECK-NEXT: li a7, 16 +; CHECK-NEXT: .LBB16_14: +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vslideup.vi v24, v8, 16 +; CHECK-NEXT: vse32.v v24, (a0) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: addi a0, a0, 384 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 6 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 72 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret %v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl) ret <128 x i32> %v } @@ -905,3 +576,6 @@ define <2 x i7> @vtrunc_v2i7_v2i8(<2 x i8> %a, <2 x i1> %m, i32 zeroext %vl) { %v = call <2 x i7> @llvm.vp.trunc.v2i7.v2i8(<2 x i8> %a, <2 x i1> %m, i32 %vl) ret <2 x i7> %v } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 8e2e8f3fb0dec..6c9989775f790 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -402,29 +402,29 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: addi a4, a1, 128 -; CHECK-NEXT: addi a7, a2, -32 -; CHECK-NEXT: sltu a3, a3, a5 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a6, a3, a5 -; CHECK-NEXT: sltu a3, a2, a7 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a5, a3, a7 -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: addi a4, a3, -16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: bltu a5, a3, .LBB32_4 +; CHECK-NEXT: sltu a3, a3, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: addi a3, a2, -32 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a4, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a5, 16 +; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a4, a1, 256 -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a4), v0.t +; CHECK-NEXT: addi a5, a1, 256 +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 05254e60b65b7..6c012a6750583 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -206,40 +206,31 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -249,8 +240,7 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index c00723cf60e57..557882ee31d4c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -94,10 +94,9 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -125,10 +124,9 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -165,10 +163,9 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -196,10 +193,9 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -303,10 +299,9 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -334,10 +329,9 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -374,10 +368,9 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -405,10 +398,9 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 9b5bde2814fda..f34efa477406a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1498,55 +1498,66 @@ define @vp_floor_nxv16f64( %va, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 8cae0bbc03c8e..a7bec94bdf33b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -165,6 +165,8 @@ define @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index cb7961cb9bd8a..32cdcdca224c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -984,20 +984,20 @@ define @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @vp_round_nxv16f64( %va, @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 75615fe0fe759..f98d9a9ae940a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1498,55 +1498,66 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 92d2562168ac7..e6272701a6033 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1358,17 +1358,19 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %data, <16 x i8> %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vi v0, v9, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -30,12 +29,12 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: beqz a1, .LBB1_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: beqz a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -54,12 +53,12 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: beqz a1, .LBB2_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: beqz a1, .LBB2_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -78,14 +77,14 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: beqz a2, .LBB3_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: vcpop.m a2, v0 ; RV32-NEXT: vid.v v9, v0.t +; RV32-NEXT: beqz a2, .LBB3_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: vredmaxu.vs v9, v9, v9 +; RV32-NEXT: li a1, 32 ; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -101,12 +100,12 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: vcpop.m a1, v0 -; RV64-NEXT: beqz a1, .LBB3_2 -; RV64-NEXT: # %bb.1: ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vcpop.m a1, v0 ; RV64-NEXT: vid.v v9, v0.t +; RV64-NEXT: beqz a1, .LBB3_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: vredmaxu.vs v9, v9, v9 ; RV64-NEXT: vmv.x.s a0, v9 ; RV64-NEXT: andi a0, a0, 255 @@ -125,12 +124,12 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: beqz a0, .LBB4_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: beqz a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -149,12 +148,12 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: beqz a0, .LBB5_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -171,13 +170,12 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % define i8 @extract_last_i8_scalable( %data, %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vcpop.m a1, v0 +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -193,13 +191,12 @@ define i8 @extract_last_i8_scalable( %data, define i16 @extract_last_i16_scalable( %data, %mask, i16 %passthru) { ; CHECK-LABEL: extract_last_i16_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vcpop.m a1, v0 +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -215,13 +212,12 @@ define i16 @extract_last_i16_scalable( %data, %data, %mask, i32 %passthru) { ; CHECK-LABEL: extract_last_i32_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vcpop.m a1, v0 +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -237,15 +233,14 @@ define i32 @extract_last_i32_scalable( %data, %data, %mask, i64 %passthru) { ; RV32-LABEL: extract_last_i64_scalable: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vcpop.m a2, v0 +; RV32-NEXT: vid.v v10, v0.t ; RV32-NEXT: beqz a2, .LBB9_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; RV32-NEXT: vid.v v10, v0.t ; RV32-NEXT: vredmaxu.vs v10, v10, v10 +; RV32-NEXT: li a1, 32 ; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -259,13 +254,12 @@ define i64 @extract_last_i64_scalable( %data, %data, %data, %mask, float %passthru) { ; CHECK-LABEL: extract_last_float_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB10_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -303,13 +296,12 @@ define float @extract_last_float_scalable( %data, %data, %mask, double %passthru) { ; CHECK-LABEL: extract_last_double_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB11_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll index fd1dbab2362a7..6435c1c14e061 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll @@ -222,9 +222,9 @@ define @vadd_vv_mask_negative( %0, @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index 90d798b167cfc..f590191a92cdd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -299,15 +299,15 @@ define @splice_nxv64i1_offset_negone( %a, < ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: vslideup.vi v8, v24, 1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 6831d1fb63cae..952758ccc24e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -436,19 +436,15 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32f16( %va, @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32f16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: fmv.x.h a2, fa0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a1, a3, 1 @@ -871,12 +872,12 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a4, a4, 3 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a4, a4, 3 ; CHECK-NEXT: mv a5, a4 @@ -884,33 +885,33 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: add a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v24, a2 +; CHECK-NEXT: vmv.v.x v8, a2 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8, v0.t ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 @@ -935,36 +936,35 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -995,14 +995,16 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a0, a1 @@ -1015,21 +1017,21 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: slli a3, a3, 4 ; CHECK-NEXT: add a3, sp, a3 ; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t -; CHECK-NEXT: vmv4r.v v24, v8 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: mv a3, a2 @@ -1037,19 +1039,19 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8, v0.t ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 @@ -1059,9 +1061,9 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -1074,14 +1076,14 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -1089,12 +1091,12 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -1102,14 +1104,13 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: vmv.v.v v16, v8 +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -2145,7 +2146,7 @@ define @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a1, a3, 1 @@ -2409,12 +2411,12 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -2422,33 +2424,33 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 +; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB68_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -2473,36 +2475,35 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -2539,14 +2540,16 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vmv.v.x v8, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -2559,21 +2562,21 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t -; ZVFHMIN-NEXT: vmv4r.v v24, v8 +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -2581,19 +2584,19 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB69_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -2603,9 +2606,9 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -2618,14 +2621,14 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -2633,12 +2636,12 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -2646,14 +2649,13 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8423,7 +8425,7 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16( %va, @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 -; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v8, v16, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v16, a1, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 @@ -8632,9 +8633,9 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 @@ -8647,33 +8648,33 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 +; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB282_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -8698,36 +8699,35 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9395,24 +9395,24 @@ define @vfnmadd_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t @@ -9459,28 +9459,27 @@ define @vfnmadd_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9764,8 +9763,8 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v24, v8, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v16, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a2, a0, a1 @@ -9778,14 +9777,14 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -9793,25 +9792,25 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB290_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -9847,25 +9846,24 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9914,8 +9912,8 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v24, v8, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v16, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a2, a0, a1 @@ -9928,14 +9926,14 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -9943,23 +9941,23 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB291_2 @@ -10378,14 +10376,14 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -10393,12 +10391,12 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -10406,9 +10404,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t ; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -10452,14 +10450,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmsub_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t @@ -11165,28 +11148,27 @@ define @vfnmsub_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: mv a1, a0 @@ -11464,7 +11446,7 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v16, v8, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 @@ -11477,14 +11459,15 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv8r.v v8, v24 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -11493,32 +11476,32 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a2 +; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB302_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -11543,36 +11526,35 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -11629,6 +11611,7 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -11636,7 +11619,7 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 @@ -11644,26 +11627,26 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a2 +; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb @@ -12154,14 +12137,14 @@ define @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @llvm.vp.fptosi.nxv32i16.nxv32f32( @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 @@ -500,16 +508,24 @@ define @vfptosi_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.rtz.x.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index a11139fea9e5b..70f4e6f4ddfb7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -490,8 +490,16 @@ declare @llvm.vp.fptoui.nxv32i16.nxv32f32( @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 @@ -500,16 +508,24 @@ define @vfptoui_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.rtz.xu.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index 9d5005f9c5ed0..98a6ae59ea67a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -96,8 +96,16 @@ declare @llvm.vp.fptrunc.nxv16f64.nxv16f32( @vfptrunc_nxv16f32_nxv16f64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 @@ -105,16 +113,24 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: .cfi_def_cfa sp, 16 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) ret %v @@ -128,54 +144,47 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a5, a1, 2 +; CHECK-NEXT: slli a6, a1, 3 ; CHECK-NEXT: slli a3, a1, 1 -; CHECK-NEXT: add a6, a0, a4 -; CHECK-NEXT: sub a0, a2, a3 -; CHECK-NEXT: sltu a4, a2, a0 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a0, a4, a0 -; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: sltu a7, a0, a4 +; CHECK-NEXT: vslidedown.vx v16, v0, a5 +; CHECK-NEXT: add a6, a0, a6 +; CHECK-NEXT: sub a5, a2, a3 +; CHECK-NEXT: vl8re64.v v24, (a6) +; CHECK-NEXT: sltu a6, a2, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: sub a6, a5, a1 +; CHECK-NEXT: sltu a7, a5, a6 ; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a4, a7, a4 -; CHECK-NEXT: srli a7, a1, 2 -; CHECK-NEXT: vl8re64.v v8, (a6) -; CHECK-NEXT: vslidedown.vx v16, v0, a7 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a5 -; CHECK-NEXT: vslidedown.vx v0, v16, a5 -; CHECK-NEXT: bltu a0, a1, .LBB8_2 +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v16, a4 +; CHECK-NEXT: and a0, a7, a6 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: bltu a5, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a5, a1 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v7, a4 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: @@ -185,11 +194,8 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: addi a3, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t @@ -197,9 +203,9 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -207,8 +213,7 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 056c7557440e0..da1d5cb117913 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -398,19 +398,15 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32f16( %va, @vpgather_baseidx_nxv32i8(ptr %base, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpmerge_nxv64i1( %va, @llvm.vp.merge.nxv64i1( %m, %va, %vb, i32 %evl) ret %v @@ -217,17 +217,17 @@ define @vpmerge_nxv128i1( %va, @llvm.vp.merge.nxv128i1( %m, %va, %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index b6cf9e1a9cb97..ccea5b05fb03c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -347,22 +347,21 @@ define double @vpreduce_ord_fadd_nxv4f64(double %s, %v, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: feq.s a1, fa0, fa0 -; CHECK-NEXT: vcpop.m a2, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t +; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t +; CHECK-NEXT: vcpop.m a0, v11, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a1, a2, a1 -; CHECK-NEXT: beqz a1, .LBB22_2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: beqz a0, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -372,22 +371,21 @@ define float @vreduce_fminimum_nxv4f32(float %start, %val, define float @vreduce_fmaximum_nxv4f32(float %start, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: feq.s a1, fa0, fa0 -; CHECK-NEXT: vcpop.m a2, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t +; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t +; CHECK-NEXT: vcpop.m a0, v11, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a1, a2, a1 -; CHECK-NEXT: beqz a1, .LBB23_2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: beqz a0, .LBB23_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB23_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -423,22 +421,21 @@ define float @vreduce_fmaximum_nnan_nxv4f32(float %start, % define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: feq.s a1, fa0, fa0 -; CHECK-NEXT: vcpop.m a2, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vcpop.m a0, v8, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a1, a2, a1 -; CHECK-NEXT: beqz a1, .LBB26_2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: beqz a0, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) @@ -448,22 +445,21 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: feq.s a1, fa0, fa0 -; CHECK-NEXT: vcpop.m a2, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vcpop.m a0, v8, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a1, a2, a1 -; CHECK-NEXT: beqz a1, .LBB27_2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: beqz a0, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll index c041a165a594f..7b6dd5399b275 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -507,8 +507,16 @@ declare @llvm.vp.sitofp.nxv32f16.nxv32i32( @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: slli a1, a1, 1 @@ -517,16 +525,24 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 +; ZVFH-NEXT: addi a3, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.x.w v28, v16, v0.t +; ZVFH-NEXT: vfncvt.f.x.w v20, v24, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB34_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB34_2: -; ZVFH-NEXT: vmv1r.v v0, v24 +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.x.w v24, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v24 +; ZVFH-NEXT: vfncvt.f.x.w v16, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: .cfi_def_cfa sp, 16 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: .cfi_def_cfa_offset 0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index ebf8d5eeb40bc..895d1d8c0ab31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -591,22 +591,22 @@ define @vssub_vi_nxv128i8( %va, @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index d54901c93d53c..52ca9ae174fdd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -589,22 +589,22 @@ define @vssubu_vi_nxv128i8( %va, @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 32d24778d7327..aabafae8a2475 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -285,54 +285,47 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @llvm.vp.uitofp.nxv32f16.nxv32i32( @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: slli a1, a1, 1 @@ -509,16 +517,24 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 +; ZVFH-NEXT: addi a3, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.xu.w v28, v16, v0.t +; ZVFH-NEXT: vfncvt.f.xu.w v20, v24, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB34_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB34_2: -; ZVFH-NEXT: vmv1r.v v0, v24 +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.xu.w v24, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v24 +; ZVFH-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: .cfi_def_cfa sp, 16 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: .cfi_def_cfa_offset 0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32: From b91c582da130c14e2be905314829c808e616a4ca Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 31 Jan 2025 16:54:01 +0800 Subject: [PATCH 3/4] Address review comments - Peek through copies to match isel better, remove most of test diff - Use liveins list to check for clobbers across blocks - Move messages into asserts - Add test for inline asm use - Assert no subregster index --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- .../lib/Target/RISCV/RISCVVMV0Elimination.cpp | 51 +- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 44 +- llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 47 +- llvm/test/CodeGen/RISCV/rvv/copyprop.mir | 4 +- llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 82 +- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 79 +- llvm/test/CodeGen/RISCV/rvv/expandload.ll | 42 +- .../RISCV/rvv/fixed-vectors-bswap-vp.ll | 32 +- .../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 150 ++-- .../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 105 ++- .../RISCV/rvv/fixed-vectors-cttz-vp.ll | 316 +++++--- .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 26 +- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 26 +- .../rvv/fixed-vectors-interleaved-access.ll | 211 ++--- .../RISCV/rvv/fixed-vectors-rint-vp.ll | 47 +- .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 7 +- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 30 +- llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 47 +- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 33 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 66 +- .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 33 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 66 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 131 +-- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 47 +- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 47 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 47 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 47 +- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 766 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 24 +- .../CodeGen/RISCV/rvv/vmv0-elimination.ll | 29 + llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 26 +- 46 files changed, 1571 insertions(+), 1421 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.ll diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 772b8541a8bf0..167dbb53c5950 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -605,7 +605,7 @@ void RISCVPassConfig::addPreRegAlloc() { // TODO: Move this as late as possible before regalloc if (TM->getOptLevel() == CodeGenOptLevel::None) addPass(createRISCVVMV0EliminationPass()); - + addPass(createRISCVPreRAExpandPseudoPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp index 24a75c1a1592f..f793b3a3b42f9 100644 --- a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp +++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp @@ -72,6 +72,10 @@ FunctionPass *llvm::createRISCVVMV0EliminationPass() { return new RISCVVMV0Elimination(); } +static bool isVMV0(const MCOperandInfo &MCOI) { + return MCOI.RegClass == RISCV::VMV0RegClassID; +} + bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -85,29 +89,26 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); const TargetInstrInfo *TII = ST->getInstrInfo(); - auto IsVMV0 = [](const MCOperandInfo &MCOI) { - return MCOI.RegClass == RISCV::VMV0RegClassID; - }; - #ifndef NDEBUG - // Assert that we won't clobber any existing reads of V0 where we need to + // Assert that we won't clobber any existing reads of v0 where we need to // insert copies. ReversePostOrderTraversal RPOT(&*MF.begin()); - SmallPtrSet V0ClobberedOnEntry; for (MachineBasicBlock *MBB : RPOT) { - bool V0Clobbered = V0ClobberedOnEntry.contains(MBB); + bool V0Clobbered = false; for (MachineInstr &MI : *MBB) { - assert(!(MI.readsRegister(RISCV::V0, TRI) && V0Clobbered)); + assert(!(MI.readsRegister(RISCV::V0, TRI) && V0Clobbered) && + "Inserting a copy to v0 would clobber a read"); if (MI.modifiesRegister(RISCV::V0, TRI)) V0Clobbered = false; - if (any_of(MI.getDesc().operands(), IsVMV0)) + if (any_of(MI.getDesc().operands(), isVMV0)) V0Clobbered = true; } - if (V0Clobbered) - for (MachineBasicBlock *Succ : MBB->successors()) - V0ClobberedOnEntry.insert(Succ); + assert(!(V0Clobbered && + any_of(MBB->successors(), + [](auto *Succ) { return Succ->isLiveIn(RISCV::V0); })) && + "Clobbered a v0 used in a successor"); } #endif @@ -116,14 +117,26 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { // For any instruction with a vmv0 operand, replace it with a copy to v0. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - // An instruction should only have one or zero vmv0 operands. - assert(count_if(MI.getDesc().operands(), IsVMV0) < 2); + assert(count_if(MI.getDesc().operands(), isVMV0) < 2 && + "Expected only one or zero vmv0 operands"); for (auto [OpNo, MCOI] : enumerate(MI.getDesc().operands())) { - if (IsVMV0(MCOI)) { + if (isVMV0(MCOI)) { MachineOperand &MO = MI.getOperand(OpNo); + Register Src = MO.getReg(); + assert(MO.isUse() && MO.getSubReg() == RISCV::NoSubRegister && + Src.isVirtual() && "vmv0 use in unexpected form"); + + // Peek through a single copy to match what isel does. + MachineInstr *SrcMI = MRI.getVRegDef(Src); + if (SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual()) { + assert(SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister); + Src = SrcMI->getOperand(1).getReg(); + } + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0) - .addReg(MO.getReg()); + .addReg(Src); + MO.setReg(RISCV::V0); MadeChange = true; break; @@ -132,6 +145,9 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { } } + if (!MadeChange) + return false; + // Now that any constraints requiring vmv0 are gone, eliminate any uses of // vmv0 by recomputing the reg class. // The only remaining uses should be around inline asm. @@ -143,7 +159,8 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { MRI.recomputeRegClass(MO.getReg()); assert(MRI.getRegClass(MO.getReg()) != &RISCV::VMV0RegClass || MI.isInlineAsm() || - MRI.getVRegDef(MO.getReg())->isInlineAsm()); + MRI.getVRegDef(MO.getReg())->isInlineAsm() && + "Non-inline-asm use of vmv0 left behind"); MadeChange = true; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 68f3de89f1f5a..4d34621cd5f24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -3065,18 +3065,18 @@ define @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir index a9da6c305aac3..0b905b57f92b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir +++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir @@ -45,8 +45,8 @@ body: | %3:vr = COPY $v8 %17:vr = PseudoVSLL_VI_M1 undef $noreg, %3, 5, 1, 6 /* e64 */, 0 %22:vr = PseudoVMSNE_VI_M1 %3, 0, 1, 6 /* e64 */ - $v0 = COPY %22 - %25:vrnov0 = PseudoVMERGE_VIM_M1 undef $noreg, %17, -1, $v0, 1, 6 /* e64 */ + %23:vmv0 = COPY %22 + %25:vrnov0 = PseudoVMERGE_VIM_M1 undef $noreg, %17, -1, %23, 1, 6 /* e64 */ %29:vr = PseudoVC_V_X_SE_M1 3, 31, %2, 1, 6 /* e64 */, implicit-def dead $sf_vcix_state, implicit $sf_vcix_state %30:vr = PseudoVMV_V_I_M1 undef $noreg, 0, 1, 6 /* e64 */, 0 BGEU %1, $x0, %bb.2 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index d95b026a239c9..52ddd9ab2f832 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2018,14 +2018,14 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v16, v7 -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: viota.m v8, v7 ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 @@ -1163,12 +1157,6 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1386,13 +1374,7 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v16, v7 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: viota.m v8, v7 ; CHECK-RV32-NEXT: vmv1r.v v0, v7 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 @@ -1404,12 +1386,6 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1471,13 +1447,7 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v16, v7 -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: viota.m v8, v7 ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 @@ -1489,12 +1459,6 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 4f7bc5ca43ecb..d765e4c0b8f6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -768,17 +768,17 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: lui a2, 1044480 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a2, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: vsll.vx v16, v8, a2, v0.t +; RV32-NEXT: addi a1, a3, -256 +; RV32-NEXT: vand.vx v24, v8, a1, v0.t ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb @@ -814,9 +814,9 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vand.vx v24, v24, a1, v0.t ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -1031,17 +1031,17 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: lui a2, 1044480 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a2, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: vsll.vx v16, v8, a2, v0.t +; RV32-NEXT: addi a1, a3, -256 +; RV32-NEXT: vand.vx v24, v8, a1, v0.t ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb @@ -1077,9 +1077,9 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vand.vx v24, v24, a1, v0.t ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 421105cfd110d..9d0d42cf754c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -2108,27 +2108,22 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vnot.v v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 @@ -2147,29 +2142,29 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 @@ -2201,14 +2196,14 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2224,14 +2219,14 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -2291,18 +2286,35 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -2312,8 +2324,7 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2321,14 +2332,14 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4803,27 +4814,22 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vnot.v v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 @@ -4842,29 +4848,29 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 @@ -4896,14 +4902,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4919,14 +4925,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -4986,18 +4992,35 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -5007,8 +5030,7 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5016,14 +5038,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index fabce69bd2d60..a5a1061842427 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1525,8 +1525,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1558,63 +1557,89 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vadd.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t -; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: sltu a0, a0, a2 @@ -1622,23 +1647,22 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1648,20 +1672,21 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index b7a53010ab6de..5f275da1740cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1769,39 +1769,54 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 @@ -1809,16 +1824,16 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb @@ -1826,7 +1841,10 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t @@ -1840,6 +1858,10 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 @@ -1858,7 +1880,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1869,18 +1892,44 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -1889,7 +1938,10 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -1916,34 +1968,34 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a2, a0 +; RV64-NEXT: mv a4, a0 ; RV64-NEXT: bltu a0, a1, .LBB34_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a2, 16 +; RV64-NEXT: li a4, 16 ; RV64-NEXT: .LBB34_2: ; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: lui a5, 61681 ; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: addiw a7, a5, -241 ; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a3, 32 -; RV64-NEXT: add a6, a3, a6 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a5, a4, a5 -; RV64-NEXT: slli a3, a7, 32 -; RV64-NEXT: add a3, a7, a3 -; RV64-NEXT: slli a4, t0, 32 -; RV64-NEXT: add a4, t0, a4 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a6, a2, a6 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a5, a3, a5 +; RV64-NEXT: slli a2, a7, 32 +; RV64-NEXT: add a2, a7, a2 +; RV64-NEXT: slli a3, t0, 32 +; RV64-NEXT: add a3, t0, a3 ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a7, a0, a7 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -1956,17 +2008,17 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vmul.vx v8, v8, a3, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t @@ -1980,8 +2032,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vmul.vx v8, v8, a3, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -3910,39 +3962,54 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 @@ -3950,16 +4017,16 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb @@ -3967,7 +4034,10 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t @@ -3981,6 +4051,10 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 @@ -3999,7 +4073,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4010,18 +4085,44 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -4030,7 +4131,10 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -4057,34 +4161,34 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a2, a0 +; RV64-NEXT: mv a4, a0 ; RV64-NEXT: bltu a0, a1, .LBB70_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a2, 16 +; RV64-NEXT: li a4, 16 ; RV64-NEXT: .LBB70_2: ; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: lui a5, 61681 ; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: addiw a7, a5, -241 ; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a3, 32 -; RV64-NEXT: add a6, a3, a6 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a5, a4, a5 -; RV64-NEXT: slli a3, a7, 32 -; RV64-NEXT: add a3, a7, a3 -; RV64-NEXT: slli a4, t0, 32 -; RV64-NEXT: add a4, t0, a4 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a6, a2, a6 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a5, a3, a5 +; RV64-NEXT: slli a2, a7, 32 +; RV64-NEXT: add a2, a7, a2 +; RV64-NEXT: slli a3, t0, 32 +; RV64-NEXT: add a3, t0, a3 ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a7, a0, a7 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -4097,17 +4201,17 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vmul.vx v8, v8, a3, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t @@ -4121,8 +4225,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vmul.vx v8, v8, a3, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index 12f325b8e78bf..4f11e6c3c386a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -680,10 +680,10 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v24, v8, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -724,7 +724,7 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB25_2 @@ -733,11 +733,11 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a2, -16 @@ -748,19 +748,19 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 -; CHECK-NEXT: vfmax.vv v16, v24, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 5b6759d066cd3..2e2103ad5e06d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -680,10 +680,10 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v24, v8, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -724,7 +724,7 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB25_2 @@ -733,11 +733,11 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a2, -16 @@ -748,19 +748,19 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 -; CHECK-NEXT: vfmin.vv v16, v24, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 6a68065bb059a..1516c67bf7ecc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -655,10 +655,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 92 +; RV64-NEXT: li a3, 88 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb ; RV64-NEXT: addi a3, a1, 128 ; RV64-NEXT: addi a6, a1, 256 ; RV64-NEXT: li a4, 128 @@ -678,20 +678,20 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vrgather.vi v4, v8, 4 ; RV64-NEXT: vrgather.vi v20, v8, 5 ; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 88 +; RV64-NEXT: li t0, 84 ; RV64-NEXT: mul a7, a7, t0 ; RV64-NEXT: add a7, sp, a7 ; RV64-NEXT: addi a7, a7, 16 ; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vrgatherei16.vv v20, v8, v16 ; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: slli a7, a7, 5 +; RV64-NEXT: slli a7, a7, 6 ; RV64-NEXT: add a7, sp, a7 ; RV64-NEXT: addi a7, a7, 16 ; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vrgatherei16.vv v20, v8, v17 ; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 76 +; RV64-NEXT: li t0, 56 ; RV64-NEXT: mul a7, a7, t0 ; RV64-NEXT: add a7, sp, a7 ; RV64-NEXT: addi a7, a7, 16 @@ -712,10 +712,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 8 +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: li t0, 40 +; RV64-NEXT: mul a7, a7, t0 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill ; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a7, 48 -; RV64-NEXT: mul a4, a4, a7 +; RV64-NEXT: slli a4, a4, 5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 16 ; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill @@ -723,7 +728,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -738,20 +743,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vi v4, v8, 2, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 80 +; RV64-NEXT: li a3, 60 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: vmv8r.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -782,19 +782,18 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vs2r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vmv.s.x v2, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 88 +; RV64-NEXT: li a3, 84 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 80 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -802,14 +801,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 88 +; RV64-NEXT: li a3, 84 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -834,87 +833,89 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vmv.s.x v2, a3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv8r.v v8, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vl1r.v v7, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 80 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v8, v16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v4, v16, 4, v0.t -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v8, v24, v0 +; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 80 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t +; RV64-NEXT: vrgather.vi v24, v8, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 96 ; RV64-NEXT: li a3, 192 -; RV64-NEXT: vmv.s.x v1, a3 +; RV64-NEXT: vmv.s.x v3, a3 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmv.v.x v24, a1 +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 72 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v24, v16, v12, v0.t +; RV64-NEXT: vrgatherei16.vv v28, v8, v24, v0.t +; RV64-NEXT: vmv4r.v v16, v8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 72 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, %hi(.LCPI8_2) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_2) ; RV64-NEXT: li a3, 1040 @@ -922,73 +923,69 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a4, a4, 1 ; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a4 -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs1r.v v8, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vmv.v.x v5, a4 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle16.v v2, (a1) +; RV64-NEXT: vle16.v v6, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v8, v24, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 68 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 80 +; RV64-NEXT: li a3, 68 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v24, v16, v20, v0.t +; RV64-NEXT: vrgatherei16.vv v28, v16, v5, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 68 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV64-NEXT: addi a1, a2, -2016 ; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v16, v24, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v16, v24, v2 -; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 48 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 80 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -997,7 +994,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 60 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload @@ -1006,16 +1004,17 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v24 +; RV64-NEXT: vmv.v.v v12, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 60 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 88 +; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1027,18 +1026,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 88 +; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv4r.v v16, v4 -; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v16, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -1051,13 +1058,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) ; RV64-NEXT: vle16.v v10, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 56 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs2r.v v10, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 56 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1065,8 +1071,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v12, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload @@ -1081,14 +1086,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 80 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 56 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl2r.v v20, (a1) # Unknown-size Folded Reload @@ -1113,20 +1117,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 88 +; RV64-NEXT: li a3, 84 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 60 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 92 +; RV64-NEXT: li a1, 88 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 69e0f45ca4b0a..266772d36ee9c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -517,19 +517,8 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 @@ -537,34 +526,36 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 @@ -574,7 +565,7 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index e9b676c2a6c22..a91dee1cb245f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s declare <2 x i7> @llvm.vp.trunc.v2i7.v2i16(<2 x i16>, <2 x i1>, i32) @@ -576,6 +576,3 @@ define <2 x i7> @vtrunc_v2i7_v2i8(<2 x i8> %a, <2 x i1> %m, i32 zeroext %vl) { %v = call <2 x i7> @llvm.vp.trunc.v2i7.v2i8(<2 x i8> %a, <2 x i1> %m, i32 %vl) ret <2 x i7> %v } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; RV32: {{.*}} -; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 6c012a6750583..05254e60b65b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -206,31 +206,40 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -240,7 +249,8 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index f34efa477406a..f9b5095c9af1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1498,66 +1498,59 @@ define @vp_floor_nxv16f64( %va, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 @@ -1648,14 +1648,14 @@ define @vfmax_vv_nxv16f64( %va, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 @@ -1648,14 +1648,14 @@ define @vfmin_vv_nxv16f64( %va, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsflags a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 4c1500797a60a..39744dcecd718 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1498,66 +1498,59 @@ define @vp_round_nxv16f64( %va, @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index f98d9a9ae940a..1300d8cd64ebb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1498,66 +1498,59 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index f590191a92cdd..90d798b167cfc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -299,15 +299,15 @@ define @splice_nxv64i1_offset_negone( %a, < ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vslideup.vi v8, v24, 1 +; CHECK-NEXT: vslideup.vi v8, v16, 1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 952758ccc24e7..9e78bbdc4f441 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -429,22 +429,26 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32f16( %va, @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32f16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a2, fa0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a1, a3, 1 @@ -872,12 +871,12 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a4, a4, 3 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a4, a4, 3 ; CHECK-NEXT: mv a5, a4 @@ -885,33 +884,33 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: add a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t +; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t ; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: vmv.v.x v24, a2 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 @@ -936,35 +935,36 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -995,16 +995,14 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vmv8r.v v24, v16 -; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a0, a1 @@ -1017,21 +1015,21 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: slli a3, a3, 4 ; CHECK-NEXT: add a3, sp, a3 ; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t -; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28, v0.t +; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: mv a3, a2 @@ -1039,19 +1037,19 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 @@ -1061,9 +1059,9 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -1076,14 +1074,14 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -1091,12 +1089,12 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -1104,13 +1102,14 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: mv a1, a0 @@ -2146,7 +2145,7 @@ define @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 -; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a1, a3, 1 @@ -2411,12 +2409,12 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -2424,33 +2422,33 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: vmv.v.x v24, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB68_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -2475,35 +2473,36 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -2540,16 +2539,14 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -2562,21 +2559,21 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: vmv4r.v v8, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vmv4r.v v24, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -2584,19 +2581,19 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB69_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -2606,9 +2603,9 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -2621,14 +2618,14 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -2636,12 +2633,12 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -2649,13 +2646,14 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8425,7 +8423,7 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vl8re16.v v24, (a0) +; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vl8re16.v v8, (a0) ; ZVFHMIN-NEXT: lui a2, 8 -; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a0, a3, 1 @@ -8517,25 +8516,25 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v24, a2 +; ZVFHMIN-NEXT: vxor.vx v16, v8, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 +; ZVFHMIN-NEXT: vmv4r.v v8, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v16, v8 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -8544,32 +8543,35 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB281_2: +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8577,12 +8579,15 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 +; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -8617,10 +8622,11 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v24, v16, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v16, a1, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 @@ -8633,9 +8639,9 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 @@ -8648,33 +8654,33 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: vmv.v.x v24, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB282_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -8699,35 +8705,36 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9395,24 +9402,24 @@ define @vfnmadd_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t @@ -9459,27 +9466,28 @@ define @vfnmadd_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9763,8 +9771,8 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v24, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v8, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a2, a0, a1 @@ -9777,14 +9785,14 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -9792,25 +9800,25 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB290_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -9846,24 +9854,25 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9912,8 +9921,8 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v24, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v8, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a2, a0, a1 @@ -9926,14 +9935,14 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -9941,23 +9950,23 @@ define @vfnmadd_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB291_2 @@ -10070,34 +10079,36 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 +; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-NEXT: vmv4r.v v16, v8 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t @@ -10108,30 +10119,33 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB292_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: mv a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 1 +; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 -; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 @@ -10293,7 +10307,6 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 -; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10302,11 +10315,11 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: vmv.v.x v24, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 @@ -10317,15 +10330,14 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv4r.v v16, v8 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -10333,7 +10345,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 @@ -10376,14 +10388,14 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -10391,12 +10403,12 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -10404,9 +10416,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t ; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -10450,7 +10462,14 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmsub_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t @@ -11148,27 +11176,28 @@ define @vfnmsub_vv_nxv32f16_commuted( % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: mv a1, a0 @@ -11446,7 +11475,7 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v8, a1, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 ; ZVFHMIN-NEXT: sub a4, a0, a1 @@ -11459,15 +11488,14 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a4, a4, 4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v8, v24 +; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -11476,32 +11504,32 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: vmv.v.x v24, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB302_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 @@ -11526,35 +11554,36 @@ define @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v16, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -11611,7 +11640,6 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 ; ZVFHMIN-NEXT: mv a5, a4 @@ -11619,7 +11647,7 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 @@ -11627,26 +11655,26 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb @@ -12137,14 +12165,14 @@ define @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @llvm.vp.fptosi.nxv32i16.nxv32f32( @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 @@ -508,24 +500,16 @@ define @vfptosi_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.rtz.x.f.w v28, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index 70f4e6f4ddfb7..a11139fea9e5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -490,16 +490,8 @@ declare @llvm.vp.fptoui.nxv32i16.nxv32f32( @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 @@ -508,24 +500,16 @@ define @vfptoui_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.rtz.xu.f.w v28, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index 98a6ae59ea67a..63156e1399293 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -157,13 +157,13 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: srli a5, a1, 2 ; CHECK-NEXT: slli a6, a1, 3 -; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: slli a4, a1, 1 ; CHECK-NEXT: vslidedown.vx v16, v0, a5 ; CHECK-NEXT: add a6, a0, a6 -; CHECK-NEXT: sub a5, a2, a3 +; CHECK-NEXT: sub a5, a2, a4 ; CHECK-NEXT: vl8re64.v v24, (a6) ; CHECK-NEXT: sltu a6, a2, a5 ; CHECK-NEXT: addi a6, a6, -1 @@ -173,7 +173,7 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi a7, a7, -1 ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v16, a4 +; CHECK-NEXT: vslidedown.vx v0, v16, a3 ; CHECK-NEXT: and a0, a7, a6 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t @@ -183,12 +183,12 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a4 +; CHECK-NEXT: vslidedown.vx v6, v7, a3 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB8_4 +; CHECK-NEXT: bltu a2, a4, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a2, a4 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index da1d5cb117913..059408a1c9c3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -391,22 +391,26 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32f16( %va, @between_inline_asm( %a, %b, %mask, ptr %p) { +; CHECK-LABEL: between_inline_asm: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: #APP +; CHECK-NEXT: vadd.vv v0, v8, v9 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vadd.vv v9, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: #APP +; CHECK-NEXT: vadd.vv v8, v8, v0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vs1r.v v9, (a0) +; CHECK-NEXT: ret + %asm1 = tail call asm "vadd.vv $0, $1, $2", "={v0},^vr,^vr"( %a, %b) + %x = call @llvm.riscv.vadd.mask( poison, %a, %b, %mask, i64 -1, i64 0) + store %x, ptr %p + %asm2 = tail call asm "vadd.vv $0, $1, $2", "=^vr,^vr,{v0}"( %a, %asm1) + ret %asm2 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index c3cfc2eabf553..4cd77185e6930 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -168,17 +168,17 @@ define @vpmerge_nxv64i1( %va, @llvm.vp.merge.nxv64i1( %m, %va, %vb, i32 %evl) ret %v @@ -217,17 +217,17 @@ define @vpmerge_nxv128i1( %va, @llvm.vp.merge.nxv128i1( %m, %va, %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll index 7b6dd5399b275..c041a165a594f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -507,16 +507,8 @@ declare @llvm.vp.sitofp.nxv32f16.nxv32i32( @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; ZVFH-NEXT: vmv1r.v v7, v0 -; ZVFH-NEXT: addi a1, sp, 16 -; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFH-NEXT: vmv1r.v v24, v0 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: slli a1, a1, 1 @@ -525,24 +517,16 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 -; ZVFH-NEXT: addi a3, sp, 16 -; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.x.w v20, v24, v0.t +; ZVFH-NEXT: vfncvt.f.x.w v28, v16, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB34_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB34_2: -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmv1r.v v0, v24 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.x.w v16, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: .cfi_def_cfa sp, 16 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: .cfi_def_cfa_offset 0 +; ZVFH-NEXT: vfncvt.f.x.w v24, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v24 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index 895d1d8c0ab31..ebf8d5eeb40bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -591,22 +591,22 @@ define @vssub_vi_nxv128i8( %va, @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 52ca9ae174fdd..d54901c93d53c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -589,22 +589,22 @@ define @vssubu_vi_nxv128i8( %va, @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index aabafae8a2475..fd5bf4ebcede8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -298,13 +298,13 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @llvm.vp.uitofp.nxv32f16.nxv32i32( @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; ZVFH-NEXT: vmv1r.v v7, v0 -; ZVFH-NEXT: addi a1, sp, 16 -; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFH-NEXT: vmv1r.v v24, v0 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: slli a1, a1, 1 @@ -517,24 +509,16 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 -; ZVFH-NEXT: addi a3, sp, 16 -; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.xu.w v20, v24, v0.t +; ZVFH-NEXT: vfncvt.f.xu.w v28, v16, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB34_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB34_2: -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmv1r.v v0, v24 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.xu.w v16, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: .cfi_def_cfa sp, 16 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: .cfi_def_cfa_offset 0 +; ZVFH-NEXT: vfncvt.f.xu.w v24, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v24 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32: From a9b5cf148fb714356b642d1f2a6d1b0b5e868c8d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 10 Feb 2025 11:36:18 +0800 Subject: [PATCH 4/4] Remove madeChange assignment, fold def into if statement, clarify comment about how the register coalescer produces unallocatable instructions --- .../lib/Target/RISCV/RISCVVMV0Elimination.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp index f793b3a3b42f9..a2daa19614d0a 100644 --- a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp +++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp @@ -10,12 +10,14 @@ // register in the singleton vmv0 register class instead of copying them to $v0 // straight away, to make optimizing masks easier. // -// However the register allocator struggles with singleton register classes and -// will run into errors like "ran out of registers during register allocation in -// function" +// However register coalescing may end up coleascing copies into vmv0, resulting +// in instructions with multiple uses of vmv0 that the register allocator can't +// allocate: // -// This pass runs just before register allocation and replaces any uses* of vmv0 -// with copies to $v0. +// %x:vrnov0 = PseudoVADD_VV_M1_MASK %0:vrnov0, %1:vr, %2:vmv0, %3:vmv0, ... +// +// To avoid this, this pass replaces any uses* of vmv0 with copies to $v0 before +// register coalescing and allocation: // // %x:vrnov0 = PseudoVADD_VV_M1_MASK %0:vrnov0, %1:vr, %2:vr, %3:vmv0, ... // -> @@ -128,8 +130,8 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { Src.isVirtual() && "vmv0 use in unexpected form"); // Peek through a single copy to match what isel does. - MachineInstr *SrcMI = MRI.getVRegDef(Src); - if (SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual()) { + if (MachineInstr *SrcMI = MRI.getVRegDef(Src); + SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual()) { assert(SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister); Src = SrcMI->getOperand(1).getReg(); } @@ -161,11 +163,10 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { MI.isInlineAsm() || MRI.getVRegDef(MO.getReg())->isInlineAsm() && "Non-inline-asm use of vmv0 left behind"); - MadeChange = true; } } } } - return MadeChange; + return true; }