diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c440df5a3e638..1192501e7799c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6963,7 +6963,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 134 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6987,7 +6987,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 134 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) @@ -9595,6 +9595,13 @@ getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, return SmallerVT; } +static bool isValidVisniInsertExtractIndex(SDValue Idx) { + auto *IdxC = dyn_cast(Idx); + if (!IdxC || isNullConstant(Idx)) + return false; + return isUInt<5>(IdxC->getZExtValue()); +} + // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the // first position of a vector, and that vector is slid up to the insert index. // By limiting the active vector length to index+1 and merging with the @@ -9705,6 +9712,23 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return Vec; return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); } + + // Use ri.vinsert.v.x if available. + if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() && + isValidVisniInsertExtractIndex(Idx)) { + // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX) + SDValue PolicyOp = + DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT); + Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx, + VL, PolicyOp); + if (AlignedIdx) + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + Vec, AlignedIdx); + if (!VecVT.isFixedLengthVector()) + return Vec; + return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); + } + ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); } else { // On RV32, i64-element vectors must be specially handled to place the @@ -9904,6 +9928,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, } } + // Use ri.vextract.x.v if available. + // TODO: Avoid index 0 and just use the vmv.x.s + if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() && + isValidVisniInsertExtractIndex(Idx)) { + SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx); + return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt); + } + // If after narrowing, the required slide is still greater than LMUL2, // fallback to generic expansion and go through the stack. This is done // for a subtle reason: extracting *all* elements out of a vector is @@ -22321,12 +22353,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VCPOP_VL) NODE_NAME_CASE(VFIRST_VL) + NODE_NAME_CASE(RI_VINSERT_VL) NODE_NAME_CASE(RI_VZIPEVEN_VL) NODE_NAME_CASE(RI_VZIPODD_VL) NODE_NAME_CASE(RI_VZIP2A_VL) NODE_NAME_CASE(RI_VZIP2B_VL) NODE_NAME_CASE(RI_VUNZIP2A_VL) NODE_NAME_CASE(RI_VUNZIP2B_VL) + NODE_NAME_CASE(RI_VEXTRACT) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 6e50ab8e1f296..ba24a0c324f51 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -404,6 +404,10 @@ enum NodeType : unsigned { // vfirst.m with additional mask and VL operands. VFIRST_VL, + // XRivosVisni + // VINSERT matches the semantics of ri.vinsert.v.x. It carries a VL operand. + RI_VINSERT_VL, + // XRivosVizip RI_VZIPEVEN_VL, RI_VZIPODD_VL, @@ -414,6 +418,12 @@ enum NodeType : unsigned { LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL, + // XRivosVisni + // VEXTRACT matches the semantics of ri.vextract.x.v. The result is always + // XLenVT sign extended from the vector element size. VEXTRACT does *not* + // have a VL operand. + RI_VEXTRACT, + // Read VLENB CSR READ_VLENB, // Reads value of CSR. diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 2247610c21ffb..6be792e1bb8cd 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -94,6 +94,10 @@ static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { } } +static bool isVExtractInstr(const MachineInstr &MI) { + return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VEXTRACT; +} + static bool isScalarExtractInstr(const MachineInstr &MI) { switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: @@ -538,6 +542,12 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { Res.MaskPolicy = false; } + if (isVExtractInstr(MI)) { + assert(!RISCVII::hasVLOp(TSFlags)); + // TODO: LMUL can be any larger value (without cost) + Res.TailPolicy = false; + } + return Res; } @@ -1085,7 +1095,7 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); } } else { - assert(isScalarExtractInstr(MI)); + assert(isScalarExtractInstr(MI) || isVExtractInstr(MI)); // Pick a random value for state tracking purposes, will be ignored via // the demanded fields mechanism InstrInfo.setAVLImm(1); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index 110dfdff7f29a..eb594c876bd12 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -128,3 +128,57 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd), (ins VR:$vs2, uimm5:$imm), "ri.vextract.x.v", "$rd, $vs2, $imm">; } + + +def ri_vextract : SDNode<"RISCVISD::RI_VEXTRACT", + SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<1>, + SDTCisInt<2>, + SDTCisInt<1>]>>; + +def ri_vinsert_vl : SDNode<"RISCVISD::RI_VINSERT_VL", + SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>, + SDTCisInt<0>, + SDTCisVT<2, XLenVT>, + SDTCisVT<3, XLenVT>, + SDTCisVT<4, XLenVT>]>>; + +let Predicates = [HasVendorXRivosVisni], mayLoad = 0, mayStore = 0, + hasSideEffects = 0, HasSEWOp = 1 in +foreach m = MxList in { + defvar mx = m.MX; + let VLMul = m.value in { + let BaseInstr = RI_VEXTRACT in + def PseudoRI_VEXTRACT_ # mx : + Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, uimm5:$idx, ixlenimm:$sew), + []>, + RISCVVPseudo; + + let HasVLOp = 1, BaseInstr = RI_VINSERT, HasVecPolicyOp = 1, + Constraints = "$rd = $rs1" in + def PseudoRI_VINSERT_ # mx : + Pseudo<(outs m.vrclass:$rd), + (ins m.vrclass:$rs1, GPR:$rs2, uimm5:$idx, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy), + []>, + RISCVVPseudo; + } +} + + + +foreach vti = AllIntegerVectors in + let Predicates = GetVTypePredicates.Predicates in { + def : Pat<(XLenVT (ri_vextract (vti.Vector vti.RegClass:$vs2), uimm5:$imm)), + (!cast("PseudoRI_VEXTRACT_" # vti.LMul.MX) + $vs2, uimm5:$imm, vti.Log2SEW)>; + + def : Pat<(vti.Vector (ri_vinsert_vl (vti.Vector vti.RegClass:$merge), + vti.ScalarRegClass:$rs1, + uimm5:$imm, + VLOpFrag, + (XLenVT timm:$policy))), + (!cast("PseudoRI_VINSERT_" # vti.LMul.MX) + $merge, vti.ScalarRegClass:$rs1, uimm5:$imm, + GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; + + } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 7e45136372b6c..75732fe2f7e65 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -7,6 +7,8 @@ ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m,+experimental-xrivosvisni -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VISNI + define i8 @extractelt_v16i8(<16 x i8> %a) nounwind { ; CHECK-LABEL: extractelt_v16i8: ; CHECK: # %bb.0: @@ -14,6 +16,12 @@ define i8 @extractelt_v16i8(<16 x i8> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16i8: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 7 +; VISNI-NEXT: ret %b = extractelement <16 x i8> %a, i32 7 ret i8 %b } @@ -25,6 +33,12 @@ define i16 @extractelt_v8i16(<8 x i16> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8i16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 7 +; VISNI-NEXT: ret %b = extractelement <8 x i16> %a, i32 7 ret i16 %b } @@ -36,6 +50,12 @@ define i32 @extractelt_v4i32(<4 x i32> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 2 +; VISNI-NEXT: ret %b = extractelement <4 x i32> %a, i32 2 ret i32 %b } @@ -55,6 +75,12 @@ define i64 @extractelt_v2i64(<2 x i64> %a) nounwind { ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v2i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = extractelement <2 x i64> %a, i32 0 ret i64 %b } @@ -67,6 +93,13 @@ define bfloat @extractelt_v8bf16(<8 x bfloat> %a) nounwind { ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8bf16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 7 +; VISNI-NEXT: fmv.h.x fa0, a0 +; VISNI-NEXT: ret %b = extractelement <8 x bfloat> %a, i32 7 ret bfloat %b } @@ -86,6 +119,13 @@ define half @extractelt_v8f16(<8 x half> %a) nounwind { ; ZVFHMIN-NEXT: vmv.x.s a0, v8 ; ZVFHMIN-NEXT: fmv.h.x fa0, a0 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: extractelt_v8f16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 7 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = extractelement <8 x half> %a, i32 7 ret half %b } @@ -97,6 +137,13 @@ define float @extractelt_v4f32(<4 x float> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v4f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 2 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = extractelement <4 x float> %a, i32 2 ret float %b } @@ -107,6 +154,12 @@ define double @extractelt_v2f64(<2 x double> %a) nounwind { ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v2f64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = extractelement <2 x double> %a, i32 0 ret double %b } @@ -118,6 +171,12 @@ define i8 @extractelt_v32i8(<32 x i8> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v32i8: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 7 +; VISNI-NEXT: ret %b = extractelement <32 x i8> %a, i32 7 ret i8 %b } @@ -129,6 +188,12 @@ define i16 @extractelt_v16i16(<16 x i16> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16i16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 7 +; VISNI-NEXT: ret %b = extractelement <16 x i16> %a, i32 7 ret i16 %b } @@ -140,6 +205,12 @@ define i32 @extractelt_v8i32(<8 x i32> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 6 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 6 +; VISNI-NEXT: ret %b = extractelement <8 x i32> %a, i32 6 ret i32 %b } @@ -161,6 +232,12 @@ define i64 @extractelt_v4i64(<4 x i64> %a) nounwind { ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v4i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 3 +; VISNI-NEXT: ret %b = extractelement <4 x i64> %a, i32 3 ret i64 %b } @@ -173,6 +250,13 @@ define bfloat @extractelt_v16bf16(<16 x bfloat> %a) nounwind { ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16bf16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 7 +; VISNI-NEXT: fmv.h.x fa0, a0 +; VISNI-NEXT: ret %b = extractelement <16 x bfloat> %a, i32 7 ret bfloat %b } @@ -192,6 +276,13 @@ define half @extractelt_v16f16(<16 x half> %a) nounwind { ; ZVFHMIN-NEXT: vmv.x.s a0, v8 ; ZVFHMIN-NEXT: fmv.h.x fa0, a0 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: extractelt_v16f16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 7 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = extractelement <16 x half> %a, i32 7 ret half %b } @@ -203,6 +294,13 @@ define float @extractelt_v8f32(<8 x float> %a) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 2 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = extractelement <8 x float> %a, i32 2 ret float %b } @@ -213,6 +311,12 @@ define double @extractelt_v4f64(<4 x double> %a) nounwind { ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v4f64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = extractelement <4 x double> %a, i32 0 ret double %b } @@ -237,6 +341,12 @@ define i64 @extractelt_v3i64(<3 x i64> %a) nounwind { ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v3i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 2 +; VISNI-NEXT: ret %b = extractelement <3 x i64> %a, i32 2 ret i64 %b } @@ -278,6 +388,12 @@ define i32 @extractelt_v32i32(<32 x i32> %a) nounwind { ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 256 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v32i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 31 +; VISNI-NEXT: ret %b = extractelement <32 x i32> %a, i32 31 ret i32 %b } @@ -319,6 +435,12 @@ define i32 @extractelt_v64i32(<64 x i32> %a) nounwind { ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 256 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v64i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v16, 31 +; VISNI-NEXT: ret %b = extractelement <64 x i32> %a, i32 63 ret i32 %b } @@ -330,6 +452,13 @@ define i8 @extractelt_v16i8_idx(<16 x i8> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16i8_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = extractelement <16 x i8> %a, i32 %idx ret i8 %b } @@ -341,6 +470,13 @@ define i16 @extractelt_v8i16_idx(<8 x i16> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8i16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = extractelement <8 x i16> %a, i32 %idx ret i16 %b } @@ -353,6 +489,14 @@ define i32 @extractelt_v4i32_idx(<4 x i32> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v4i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = add <4 x i32> %a, %a %c = extractelement <4 x i32> %b, i32 %idx ret i32 %c @@ -378,6 +522,14 @@ define i64 @extractelt_v2i64_idx(<2 x i64> %a, i32 zeroext %idx) nounwind { ; RV64-NEXT: vslidedown.vx v8, v8, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v2i64_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = add <2 x i64> %a, %a %c = extractelement <2 x i64> %b, i32 %idx ret i64 %c @@ -396,6 +548,19 @@ define bfloat @extractelt_v8bf16_idx(<8 x bfloat> %a, i32 zeroext %idx) nounwind ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8bf16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; VISNI-NEXT: vfwcvtbf16.f.f.v v10, v8 +; VISNI-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; VISNI-NEXT: vfadd.vv v8, v10, v10 +; VISNI-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; VISNI-NEXT: vfncvtbf16.f.f.w v10, v8 +; VISNI-NEXT: vslidedown.vx v8, v10, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: fmv.h.x fa0, a0 +; VISNI-NEXT: ret %b = fadd <8 x bfloat> %a, %a %c = extractelement <8 x bfloat> %b, i32 %idx ret bfloat %c @@ -422,6 +587,14 @@ define half @extractelt_v8f16_idx(<8 x half> %a, i32 zeroext %idx) nounwind { ; ZVFHMIN-NEXT: vmv.x.s a0, v8 ; ZVFHMIN-NEXT: fmv.h.x fa0, a0 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: extractelt_v8f16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; VISNI-NEXT: vfadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = fadd <8 x half> %a, %a %c = extractelement <8 x half> %b, i32 %idx ret half %c @@ -435,6 +608,14 @@ define float @extractelt_v4f32_idx(<4 x float> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v4f32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VISNI-NEXT: vfadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = fadd <4 x float> %a, %a %c = extractelement <4 x float> %b, i32 %idx ret float %c @@ -448,6 +629,14 @@ define double @extractelt_v2f64_idx(<2 x double> %a, i32 zeroext %idx) nounwind ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v2f64_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; VISNI-NEXT: vfadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = fadd <2 x double> %a, %a %c = extractelement <2 x double> %b, i32 %idx ret double %c @@ -460,6 +649,13 @@ define i8 @extractelt_v32i8_idx(<32 x i8> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v32i8_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = extractelement <32 x i8> %a, i32 %idx ret i8 %b } @@ -471,6 +667,13 @@ define i16 @extractelt_v16i16_idx(<16 x i16> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16i16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = extractelement <16 x i16> %a, i32 %idx ret i16 %b } @@ -483,6 +686,14 @@ define i32 @extractelt_v8i32_idx(<8 x i32> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = add <8 x i32> %a, %a %c = extractelement <8 x i32> %b, i32 %idx ret i32 %c @@ -508,6 +719,14 @@ define i64 @extractelt_v4i64_idx(<4 x i64> %a, i32 zeroext %idx) nounwind { ; RV64-NEXT: vslidedown.vx v8, v8, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v4i64_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = add <4 x i64> %a, %a %c = extractelement <4 x i64> %b, i32 %idx ret i64 %c @@ -526,6 +745,19 @@ define bfloat @extractelt_v16bf16_idx(<16 x bfloat> %a, i32 zeroext %idx) nounwi ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: fmv.h.x fa0, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16bf16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; VISNI-NEXT: vfwcvtbf16.f.f.v v12, v8 +; VISNI-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; VISNI-NEXT: vfadd.vv v8, v12, v12 +; VISNI-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; VISNI-NEXT: vfncvtbf16.f.f.w v12, v8 +; VISNI-NEXT: vslidedown.vx v8, v12, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: fmv.h.x fa0, a0 +; VISNI-NEXT: ret %b = fadd <16 x bfloat> %a, %a %c = extractelement <16 x bfloat> %b, i32 %idx ret bfloat %c @@ -552,6 +784,14 @@ define half @extractelt_v16f16_idx(<16 x half> %a, i32 zeroext %idx) nounwind { ; ZVFHMIN-NEXT: vmv.x.s a0, v8 ; ZVFHMIN-NEXT: fmv.h.x fa0, a0 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: extractelt_v16f16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; VISNI-NEXT: vfadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = fadd <16 x half> %a, %a %c = extractelement <16 x half> %b, i32 %idx ret half %c @@ -565,6 +805,14 @@ define float @extractelt_v8f32_idx(<8 x float> %a, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v8f32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VISNI-NEXT: vfadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = fadd <8 x float> %a, %a %c = extractelement <8 x float> %b, i32 %idx ret float %c @@ -578,6 +826,14 @@ define double @extractelt_v4f64_idx(<4 x double> %a, i32 zeroext %idx) nounwind ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v4f64_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VISNI-NEXT: vfadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vfmv.f.s fa0, v8 +; VISNI-NEXT: ret %b = fadd <4 x double> %a, %a %c = extractelement <4 x double> %b, i32 %idx ret double %c @@ -608,6 +864,14 @@ define i64 @extractelt_v3i64_idx(<3 x i64> %a, i32 zeroext %idx) nounwind { ; RV64-NEXT: vslidedown.vx v8, v8, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v3i64_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vslidedown.vx v8, v8, a0 +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: ret %b = add <3 x i64> %a, %a %c = extractelement <3 x i64> %b, i32 %idx ret i64 %c @@ -713,6 +977,29 @@ define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64M-NEXT: ld s0, 240(sp) # 8-byte Folded Reload ; RV64M-NEXT: addi sp, sp, 256 ; RV64M-NEXT: ret +; +; VISNI-LABEL: extractelt_v32i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: addi sp, sp, -256 +; VISNI-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; VISNI-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; VISNI-NEXT: addi s0, sp, 256 +; VISNI-NEXT: andi sp, sp, -128 +; VISNI-NEXT: andi a1, a1, 31 +; VISNI-NEXT: li a2, 32 +; VISNI-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; VISNI-NEXT: vle32.v v8, (a0) +; VISNI-NEXT: slli a1, a1, 2 +; VISNI-NEXT: mv a0, sp +; VISNI-NEXT: or a1, a0, a1 +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vse32.v v8, (a0) +; VISNI-NEXT: lw a0, 0(a1) +; VISNI-NEXT: addi sp, s0, -256 +; VISNI-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; VISNI-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; VISNI-NEXT: addi sp, sp, 256 +; VISNI-NEXT: ret %a = load <32 x i32>, ptr %x %b = add <32 x i32> %a, %a %c = extractelement <32 x i32> %b, i32 %idx @@ -769,6 +1056,31 @@ define i32 @extractelt_v64i32_idx(<64 x i32> %a, i32 zeroext %idx) nounwind { ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 384 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_v64i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: addi sp, sp, -384 +; VISNI-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; VISNI-NEXT: sd s0, 368(sp) # 8-byte Folded Spill +; VISNI-NEXT: addi s0, sp, 384 +; VISNI-NEXT: andi sp, sp, -128 +; VISNI-NEXT: andi a0, a0, 63 +; VISNI-NEXT: mv a1, sp +; VISNI-NEXT: li a2, 32 +; VISNI-NEXT: addi a3, sp, 128 +; VISNI-NEXT: slli a0, a0, 2 +; VISNI-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v8 +; VISNI-NEXT: vadd.vv v16, v16, v16 +; VISNI-NEXT: add a0, a1, a0 +; VISNI-NEXT: vse32.v v16, (a3) +; VISNI-NEXT: vse32.v v8, (a1) +; VISNI-NEXT: lw a0, 0(a0) +; VISNI-NEXT: addi sp, s0, -384 +; VISNI-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; VISNI-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; VISNI-NEXT: addi sp, sp, 384 +; VISNI-NEXT: ret %b = add <64 x i32> %a, %a %c = extractelement <64 x i32> %b, i32 %idx ret i32 %c @@ -781,6 +1093,13 @@ define void @store_extractelt_v16i8(<16 x i8> %a, ptr %p) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: store_extractelt_v16i8: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a1, v8, 7 +; VISNI-NEXT: sb a1, 0(a0) +; VISNI-NEXT: ret %b = extractelement <16 x i8> %a, i32 7 store i8 %b, ptr %p ret void @@ -793,6 +1112,13 @@ define void @store_extractelt_v8i16(<8 x i16> %a, ptr %p) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: store_extractelt_v8i16: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a1, v8, 7 +; VISNI-NEXT: sh a1, 0(a0) +; VISNI-NEXT: ret %b = extractelement <8 x i16> %a, i32 7 store i16 %b, ptr %p ret void @@ -805,6 +1131,13 @@ define void @store_extractelt_v4i32(<4 x i32> %a, ptr %p) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: store_extractelt_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a1, v8, 2 +; VISNI-NEXT: sw a1, 0(a0) +; VISNI-NEXT: ret %b = extractelement <4 x i32> %a, i32 2 store i32 %b, ptr %p ret void @@ -830,6 +1163,13 @@ define void @store_extractelt_v2i64(<2 x i64> %a, ptr %p) nounwind { ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: store_extractelt_v2i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a1, v8, 1 +; VISNI-NEXT: sd a1, 0(a0) +; VISNI-NEXT: ret %b = extractelement <2 x i64> %a, i64 1 store i64 %b, ptr %p ret void @@ -842,6 +1182,13 @@ define void @store_extractelt_v2f64(<2 x double> %a, ptr %p) nounwind { ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: store_extractelt_v2f64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 1 +; VISNI-NEXT: vse64.v v8, (a0) +; VISNI-NEXT: ret %b = extractelement <2 x double> %a, i64 1 store double %b, ptr %p ret void @@ -863,6 +1210,13 @@ define i32 @extractelt_add_v4i32(<4 x i32> %x) { ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: addiw a0, a0, 13 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_add_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 2 +; VISNI-NEXT: addiw a0, a0, 13 +; VISNI-NEXT: ret %bo = add <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -886,6 +1240,14 @@ define i32 @extractelt_sub_v4i32(<4 x i32> %x) { ; RV64-NEXT: li a1, 13 ; RV64-NEXT: subw a0, a1, a0 ; RV64-NEXT: ret +; +; VISNI-LABEL: extractelt_sub_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 2 +; VISNI-NEXT: li a1, 13 +; VISNI-NEXT: subw a0, a1, a0 +; VISNI-NEXT: ret %bo = sub <4 x i32> , %x %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -927,6 +1289,14 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) { ; RV64M-NEXT: li a1, 13 ; RV64M-NEXT: mulw a0, a0, a1 ; RV64M-NEXT: ret +; +; VISNI-LABEL: extractelt_mul_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 2 +; VISNI-NEXT: li a1, 13 +; VISNI-NEXT: mulw a0, a0, a1 +; VISNI-NEXT: ret %bo = mul <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -1004,6 +1374,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV64M-NEXT: srai a0, a0, 34 ; RV64M-NEXT: add a0, a0, a1 ; RV64M-NEXT: ret +; +; VISNI-LABEL: extractelt_sdiv_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v8, 2 +; VISNI-NEXT: lui a1, 322639 +; VISNI-NEXT: sext.w a0, a0 +; VISNI-NEXT: addiw a1, a1, -945 +; VISNI-NEXT: mul a0, a0, a1 +; VISNI-NEXT: srli a1, a0, 63 +; VISNI-NEXT: srai a0, a0, 34 +; VISNI-NEXT: add a0, a0, a1 +; VISNI-NEXT: ret %bo = sdiv <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -1058,6 +1441,18 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; RV64M-NEXT: mulhu a0, a1, a0 ; RV64M-NEXT: srli a0, a0, 34 ; RV64M-NEXT: ret +; +; VISNI-LABEL: extractelt_udiv_v4i32: +; VISNI: # %bb.0: +; VISNI-NEXT: lui a0, 322639 +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a1, v8, 2 +; VISNI-NEXT: addi a0, a0, -945 +; VISNI-NEXT: slli a0, a0, 32 +; VISNI-NEXT: slli a1, a1, 32 +; VISNI-NEXT: mulhu a0, a1, a0 +; VISNI-NEXT: srli a0, a0, 34 +; VISNI-NEXT: ret %bo = udiv <4 x i32> %x, %ext = extractelement <4 x i32> %bo, i32 2 ret i32 %ext @@ -1073,6 +1468,16 @@ define float @extractelt_fadd_v4f32(<4 x float> %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fadd.s fa0, fa5, fa4 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_fadd_v4f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 2 +; VISNI-NEXT: lui a0, 267520 +; VISNI-NEXT: vfmv.f.s fa5, v8 +; VISNI-NEXT: fmv.w.x fa4, a0 +; VISNI-NEXT: fadd.s fa0, fa5, fa4 +; VISNI-NEXT: ret %bo = fadd <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 2 ret float %ext @@ -1088,6 +1493,16 @@ define float @extractelt_fsub_v4f32(<4 x float> %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fsub.s fa0, fa4, fa5 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_fsub_v4f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 2 +; VISNI-NEXT: lui a0, 267520 +; VISNI-NEXT: vfmv.f.s fa5, v8 +; VISNI-NEXT: fmv.w.x fa4, a0 +; VISNI-NEXT: fsub.s fa0, fa4, fa5 +; VISNI-NEXT: ret %bo = fsub <4 x float> , %x %ext = extractelement <4 x float> %bo, i32 2 ret float %ext @@ -1103,6 +1518,16 @@ define float @extractelt_fmul_v4f32(<4 x float> %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fmul.s fa0, fa5, fa4 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_fmul_v4f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 2 +; VISNI-NEXT: lui a0, 267520 +; VISNI-NEXT: vfmv.f.s fa5, v8 +; VISNI-NEXT: fmv.w.x fa4, a0 +; VISNI-NEXT: fmul.s fa0, fa5, fa4 +; VISNI-NEXT: ret %bo = fmul <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 2 ret float %ext @@ -1118,6 +1543,16 @@ define float @extractelt_fdiv_v4f32(<4 x float> %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fdiv.s fa0, fa5, fa4 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_fdiv_v4f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: vslidedown.vi v8, v8, 2 +; VISNI-NEXT: lui a0, 267520 +; VISNI-NEXT: vfmv.f.s fa5, v8 +; VISNI-NEXT: fmv.w.x fa4, a0 +; VISNI-NEXT: fdiv.s fa0, fa5, fa4 +; VISNI-NEXT: ret %bo = fdiv <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 2 ret float %ext @@ -1130,6 +1565,12 @@ define i32 @extractelt_v16i32_idx7_exact_vlen(<16 x i32> %a) nounwind vscale_ran ; CHECK-NEXT: vslidedown.vi v8, v9, 3 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16i32_idx7_exact_vlen: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v9, 3 +; VISNI-NEXT: ret %b = extractelement <16 x i32> %a, i32 7 ret i32 %b } @@ -1141,6 +1582,12 @@ define i32 @extractelt_v16i32_idx15_exact_vlen(<16 x i32> %a) nounwind vscale_ra ; CHECK-NEXT: vslidedown.vi v8, v11, 3 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; VISNI-LABEL: extractelt_v16i32_idx15_exact_vlen: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a0, v11, 3 +; VISNI-NEXT: ret %b = extractelement <16 x i32> %a, i32 15 ret i32 %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 6782b2003ba94..3a5b3719931a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFH ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFHMIN,ZVFHMINRV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFHMIN,ZVFHMINRV64 +; +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+experimental-xrivosvisni -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VISNI define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) { ; CHECK-LABEL: insertelt_v4i32_0: @@ -10,6 +12,12 @@ define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v4i32_0: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %b = insertelement <4 x i32> %a, i32 %y, i32 0 ret <4 x i32> %b } @@ -21,6 +29,12 @@ define <4 x i32> @insertelt_v4i32_3(<4 x i32> %a, i32 %y) { ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v4i32_3: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 3 +; VISNI-NEXT: ret %b = insertelement <4 x i32> %a, i32 %y, i32 3 ret <4 x i32> %b } @@ -34,6 +48,15 @@ define <4 x i32> @insertelt_v4i32_idx(<4 x i32> %a, i32 %y, i32 zeroext %idx) { ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v4i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: addi a2, a1, 1 +; VISNI-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VISNI-NEXT: vmv.s.x v9, a0 +; VISNI-NEXT: vsetvli zero, a2, e32, m1, tu, ma +; VISNI-NEXT: vslideup.vx v8, v9, a1 +; VISNI-NEXT: ret %b = insertelement <4 x i32> %a, i32 %y, i32 %idx ret <4 x i32> %b } @@ -45,6 +68,13 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) { ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v32i32_0: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, 32 +; VISNI-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 0 ret <32 x i32> %b } @@ -56,6 +86,13 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) { ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v32i32_4: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, 32 +; VISNI-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 4 +; VISNI-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 4 ret <32 x i32> %b } @@ -68,6 +105,13 @@ define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) { ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 31 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v32i32_31: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, 32 +; VISNI-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 31 +; VISNI-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 31 ret <32 x i32> %b } @@ -82,6 +126,16 @@ define <32 x i32> @insertelt_v32i32_idx(<32 x i32> %a, i32 %y, i32 zeroext %idx) ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v32i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: li a2, 32 +; VISNI-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; VISNI-NEXT: vmv.s.x v16, a0 +; VISNI-NEXT: addi a0, a1, 1 +; VISNI-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; VISNI-NEXT: vslideup.vx v8, v16, a1 +; VISNI-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 %idx ret <32 x i32> %b } @@ -93,6 +147,13 @@ define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) { ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v64i32_0: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, 32 +; VISNI-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %b = insertelement <64 x i32> %a, i32 %y, i32 0 ret <64 x i32> %b } @@ -105,6 +166,13 @@ define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) { ; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vslideup.vi v16, v24, 31 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v64i32_63: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, 32 +; VISNI-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v16, a0, 31 +; VISNI-NEXT: ret %b = insertelement <64 x i32> %a, i32 %y, i32 63 ret <64 x i32> %b } @@ -175,6 +243,39 @@ define <64 x i32> @insertelt_v64i32_idx(<64 x i32> %a, i32 %y, i32 zeroext %idx) ; RV64-NEXT: addi sp, sp, 384 ; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v64i32_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: addi sp, sp, -384 +; VISNI-NEXT: .cfi_def_cfa_offset 384 +; VISNI-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; VISNI-NEXT: sd s0, 368(sp) # 8-byte Folded Spill +; VISNI-NEXT: .cfi_offset ra, -8 +; VISNI-NEXT: .cfi_offset s0, -16 +; VISNI-NEXT: addi s0, sp, 384 +; VISNI-NEXT: .cfi_def_cfa s0, 0 +; VISNI-NEXT: andi sp, sp, -128 +; VISNI-NEXT: andi a1, a1, 63 +; VISNI-NEXT: mv a2, sp +; VISNI-NEXT: addi a3, sp, 128 +; VISNI-NEXT: li a4, 32 +; VISNI-NEXT: slli a1, a1, 2 +; VISNI-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; VISNI-NEXT: vse32.v v16, (a3) +; VISNI-NEXT: vse32.v v8, (a2) +; VISNI-NEXT: add a1, a2, a1 +; VISNI-NEXT: sw a0, 0(a1) +; VISNI-NEXT: vle32.v v8, (a2) +; VISNI-NEXT: vle32.v v16, (a3) +; VISNI-NEXT: addi sp, s0, -384 +; VISNI-NEXT: .cfi_def_cfa sp, 384 +; VISNI-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; VISNI-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; VISNI-NEXT: .cfi_restore ra +; VISNI-NEXT: .cfi_restore s0 +; VISNI-NEXT: addi sp, sp, 384 +; VISNI-NEXT: .cfi_def_cfa_offset 0 +; VISNI-NEXT: ret %b = insertelement <64 x i32> %a, i32 %y, i32 %idx ret <64 x i32> %b } @@ -198,6 +299,12 @@ define <4 x i64> @insertelt_v4i64(<4 x i64> %a, i64 %y) { ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vslideup.vi v8, v10, 3 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v4i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 3 +; VISNI-NEXT: ret %b = insertelement <4 x i64> %a, i64 %y, i32 3 ret <4 x i64> %b } @@ -213,6 +320,11 @@ define void @insertelt_v4i64_store(ptr %x, i64 %y) { ; RV64: # %bb.0: ; RV64-NEXT: sd a1, 24(a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v4i64_store: +; VISNI: # %bb.0: +; VISNI-NEXT: sd a1, 24(a0) +; VISNI-NEXT: ret %a = load <4 x i64>, ptr %x %b = insertelement <4 x i64> %a, i64 %y, i32 3 store <4 x i64> %b, ptr %x @@ -256,6 +368,18 @@ define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) { ; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v3i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: ri.vextract.x.v a1, v8, 1 +; VISNI-NEXT: vmv.x.s a2, v8 +; VISNI-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VISNI-NEXT: vmv.v.x v8, a2 +; VISNI-NEXT: vslide1down.vx v8, v8, a1 +; VISNI-NEXT: vslide1down.vx v8, v8, a0 +; VISNI-NEXT: vslidedown.vi v8, v8, 1 +; VISNI-NEXT: ret %b = insertelement <3 x i64> %a, i64 %y, i32 2 ret <3 x i64> %b } @@ -271,6 +395,11 @@ define void @insertelt_v3i64_store(ptr %x, i64 %y) { ; RV64: # %bb.0: ; RV64-NEXT: sd a1, 16(a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v3i64_store: +; VISNI: # %bb.0: +; VISNI-NEXT: sd a1, 16(a0) +; VISNI-NEXT: ret %a = load <3 x i64>, ptr %x, align 8 %b = insertelement <3 x i64> %a, i64 %y, i32 2 store <3 x i64> %b, ptr %x @@ -284,6 +413,12 @@ define <16 x i8> @insertelt_v16i8(<16 x i8> %a, i8 %y) { ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 14 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v16i8: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 14 +; VISNI-NEXT: ret %b = insertelement <16 x i8> %a, i8 %y, i32 14 ret <16 x i8> %b } @@ -293,6 +428,11 @@ define void @insertelt_v16i8_store(ptr %x, i8 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: sb a1, 14(a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v16i8_store: +; VISNI: # %bb.0: +; VISNI-NEXT: sb a1, 14(a0) +; VISNI-NEXT: ret %a = load <16 x i8>, ptr %x %b = insertelement <16 x i8> %a, i8 %y, i32 14 store <16 x i8> %b, ptr %x @@ -321,6 +461,18 @@ define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) { ; RV64-NEXT: vsetvli zero, a0, e16, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a1 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v32i16: +; VISNI: # %bb.0: +; VISNI-NEXT: li a2, 32 +; VISNI-NEXT: slli a1, a1, 32 +; VISNI-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; VISNI-NEXT: vmv.s.x v12, a0 +; VISNI-NEXT: srli a1, a1, 32 +; VISNI-NEXT: addi a0, a1, 1 +; VISNI-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; VISNI-NEXT: vslideup.vx v8, v12, a1 +; VISNI-NEXT: ret %b = insertelement <32 x i16> %a, i16 %y, i32 %idx ret <32 x i16> %b } @@ -333,6 +485,14 @@ define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) { ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v32i16_store: +; VISNI: # %bb.0: +; VISNI-NEXT: andi a2, a2, 31 +; VISNI-NEXT: slli a2, a2, 1 +; VISNI-NEXT: add a0, a0, a2 +; VISNI-NEXT: sh a1, 0(a0) +; VISNI-NEXT: ret %a = load <32 x i16>, ptr %x %b = insertelement <32 x i16> %a, i16 %y, i32 %idx store <32 x i16> %b, ptr %x @@ -359,6 +519,17 @@ define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) { ; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; RV64-NEXT: vslideup.vx v8, v10, a0 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v8f32: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; VISNI-NEXT: vfmv.s.f v10, fa0 +; VISNI-NEXT: slli a0, a0, 32 +; VISNI-NEXT: srli a0, a0, 32 +; VISNI-NEXT: addi a1, a0, 1 +; VISNI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; VISNI-NEXT: vslideup.vx v8, v10, a0 +; VISNI-NEXT: ret %b = insertelement <8 x float> %a, float %y, i32 %idx ret <8 x float> %b } @@ -371,6 +542,14 @@ define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) { ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: fsw fa0, 0(a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v8f32_store: +; VISNI: # %bb.0: +; VISNI-NEXT: andi a1, a1, 7 +; VISNI-NEXT: slli a1, a1, 2 +; VISNI-NEXT: add a0, a0, a1 +; VISNI-NEXT: fsw fa0, 0(a0) +; VISNI-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> %a, float %y, i32 %idx store <8 x float> %b, ptr %x @@ -384,6 +563,13 @@ define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) { ; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v8i64_0: +; VISNI: # %bb.0: +; VISNI-NEXT: li a0, -1 +; VISNI-NEXT: vsetivli zero, 8, e64, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %b = insertelement <8 x i64> %a, i64 -1, i32 0 ret <8 x i64> %b } @@ -401,6 +587,12 @@ define void @insertelt_v8i64_0_store(ptr %x) { ; RV64-NEXT: li a1, -1 ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v8i64_0_store: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, -1 +; VISNI-NEXT: sd a1, 0(a0) +; VISNI-NEXT: ret %a = load <8 x i64>, ptr %x %b = insertelement <8 x i64> %a, i64 -1, i32 0 store <8 x i64> %b, ptr %x @@ -427,6 +619,17 @@ define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) { ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a0 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v8i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; VISNI-NEXT: vmv.v.i v12, -1 +; VISNI-NEXT: slli a0, a0, 32 +; VISNI-NEXT: srli a0, a0, 32 +; VISNI-NEXT: addi a1, a0, 1 +; VISNI-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; VISNI-NEXT: vslideup.vx v8, v12, a0 +; VISNI-NEXT: ret %b = insertelement <8 x i64> %a, i64 -1, i32 %idx ret <8 x i64> %b } @@ -450,6 +653,15 @@ define void @insertelt_v8i64_store(ptr %x, i32 %idx) { ; RV64-NEXT: li a1, -1 ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_v8i64_store: +; VISNI: # %bb.0: +; VISNI-NEXT: andi a1, a1, 7 +; VISNI-NEXT: slli a1, a1, 3 +; VISNI-NEXT: add a0, a0, a1 +; VISNI-NEXT: li a1, -1 +; VISNI-NEXT: sd a1, 0(a0) +; VISNI-NEXT: ret %a = load <8 x i64>, ptr %x %b = insertelement <8 x i64> %a, i64 -1, i32 %idx store <8 x i64> %b, ptr %x @@ -463,6 +675,13 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) { ; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c6_v8i64_0: +; VISNI: # %bb.0: +; VISNI-NEXT: li a0, 6 +; VISNI-NEXT: vsetivli zero, 8, e64, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %b = insertelement <8 x i64> %a, i64 6, i32 0 ret <8 x i64> %b } @@ -480,6 +699,12 @@ define void @insertelt_c6_v8i64_0_store(ptr %x) { ; RV64-NEXT: li a1, 6 ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_c6_v8i64_0_store: +; VISNI: # %bb.0: +; VISNI-NEXT: li a1, 6 +; VISNI-NEXT: sd a1, 0(a0) +; VISNI-NEXT: ret %a = load <8 x i64>, ptr %x %b = insertelement <8 x i64> %a, i64 6, i32 0 store <8 x i64> %b, ptr %x @@ -506,6 +731,17 @@ define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) { ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma ; RV64-NEXT: vslideup.vx v8, v12, a0 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_c6_v8i64: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; VISNI-NEXT: vmv.v.i v12, 6 +; VISNI-NEXT: slli a0, a0, 32 +; VISNI-NEXT: srli a0, a0, 32 +; VISNI-NEXT: addi a1, a0, 1 +; VISNI-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; VISNI-NEXT: vslideup.vx v8, v12, a0 +; VISNI-NEXT: ret %b = insertelement <8 x i64> %a, i64 6, i32 %idx ret <8 x i64> %b } @@ -529,6 +765,15 @@ define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) { ; RV64-NEXT: li a1, 6 ; RV64-NEXT: sd a1, 0(a0) ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_c6_v8i64_store: +; VISNI: # %bb.0: +; VISNI-NEXT: andi a1, a1, 7 +; VISNI-NEXT: slli a1, a1, 3 +; VISNI-NEXT: add a0, a0, a1 +; VISNI-NEXT: li a1, 6 +; VISNI-NEXT: sd a1, 0(a0) +; VISNI-NEXT: ret %a = load <8 x i64>, ptr %x %b = insertelement <8 x i64> %a, i64 6, i32 %idx store <8 x i64> %b, ptr %x @@ -550,6 +795,19 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c6_v8i64_0_add: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; VISNI-NEXT: vle64.v v8, (a0) +; VISNI-NEXT: vle64.v v12, (a1) +; VISNI-NEXT: li a1, 6 +; VISNI-NEXT: vsetvli zero, zero, e64, m4, tu, ma +; VISNI-NEXT: vmv.s.x v8, a1 +; VISNI-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; VISNI-NEXT: vadd.vv v8, v8, v12 +; VISNI-NEXT: vse64.v v8, (a0) +; VISNI-NEXT: ret %a = load <8 x i64>, ptr %x %b = insertelement <8 x i64> %a, i64 6, i32 0 %c = load <8 x i64>, ptr %y @@ -567,6 +825,12 @@ define <16 x i32> @insertelt_c0_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_ra ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c0_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 0 ret <16 x i32> %v } @@ -578,6 +842,12 @@ define <16 x i32> @insertelt_c1_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_ra ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 1 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c1_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 1 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 1 ret <16 x i32> %v } @@ -589,6 +859,12 @@ define <16 x i32> @insertelt_c2_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_ra ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 2 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c2_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 2 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 2 ret <16 x i32> %v } @@ -600,6 +876,12 @@ define <16 x i32> @insertelt_c3_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_ra ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c3_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 3 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 3 ret <16 x i32> %v } @@ -610,6 +892,12 @@ define <16 x i32> @insertelt_c12_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_r ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c12_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, tu, ma +; VISNI-NEXT: vmv.s.x v11, a0 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 12 ret <16 x i32> %v } @@ -621,6 +909,12 @@ define <16 x i32> @insertelt_c13_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_r ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v11, v12, 1 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c13_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v11, a0, 1 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 13 ret <16 x i32> %v } @@ -632,6 +926,12 @@ define <16 x i32> @insertelt_c14_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_r ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v11, v12, 2 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c14_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v11, a0, 2 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 14 ret <16 x i32> %v } @@ -643,6 +943,12 @@ define <16 x i32> @insertelt_c15_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_r ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v11, v12, 3 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_c15_v16xi32_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v11, a0, 3 +; VISNI-NEXT: ret %v = insertelement <16 x i32> %vin, i32 %a, i32 15 ret <16 x i32> %v } @@ -660,6 +966,12 @@ define <8 x i64> @insertelt_c4_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range ; RV64-NEXT: vsetivli zero, 8, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_c4_v8xi64_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e64, m1, tu, ma +; VISNI-NEXT: vmv.s.x v10, a0 +; VISNI-NEXT: ret %v = insertelement <8 x i64> %vin, i64 %a, i32 4 ret <8 x i64> %v } @@ -680,6 +992,12 @@ define <8 x i64> @insertelt_c5_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range ; RV64-NEXT: vmv.s.x v12, a0 ; RV64-NEXT: vslideup.vi v10, v12, 1 ; RV64-NEXT: ret +; +; VISNI-LABEL: insertelt_c5_v8xi64_exact: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v10, a0, 1 +; VISNI-NEXT: ret %v = insertelement <8 x i64> %vin, i64 %a, i32 5 ret <8 x i64> %v } @@ -691,6 +1009,13 @@ define <4 x bfloat> @insertelt_v4bf16_0(<4 x bfloat> %a, bfloat %y) { ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v4bf16_0: +; VISNI: # %bb.0: +; VISNI-NEXT: fmv.x.h a0, fa0 +; VISNI-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; VISNI-NEXT: vmv.s.x v8, a0 +; VISNI-NEXT: ret %b = insertelement <4 x bfloat> %a, bfloat %y, i32 0 ret <4 x bfloat> %b } @@ -703,6 +1028,13 @@ define <4 x bfloat> @insertelt_v4bf16_3(<4 x bfloat> %a, bfloat %y) { ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v4bf16_3: +; VISNI: # %bb.0: +; VISNI-NEXT: fmv.x.h a0, fa0 +; VISNI-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; VISNI-NEXT: ri.vinsert.v.x v8, a0, 3 +; VISNI-NEXT: ret %b = insertelement <4 x bfloat> %a, bfloat %y, i32 3 ret <4 x bfloat> %b } @@ -717,6 +1049,16 @@ define <4 x bfloat> @insertelt_v4bf16_idx(<4 x bfloat> %a, bfloat %y, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret +; +; VISNI-LABEL: insertelt_v4bf16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: addi a1, a0, 1 +; VISNI-NEXT: fmv.x.h a2, fa0 +; VISNI-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; VISNI-NEXT: vmv.s.x v9, a2 +; VISNI-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; VISNI-NEXT: vslideup.vx v8, v9, a0 +; VISNI-NEXT: ret %b = insertelement <4 x bfloat> %a, bfloat %y, i32 %idx ret <4 x bfloat> %b } @@ -734,6 +1076,12 @@ define <4 x half> @insertelt_v4f16_0(<4 x half> %a, half %y) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; ZVFHMIN-NEXT: vmv.s.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: insertelt_v4f16_0: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; VISNI-NEXT: vfmv.s.f v8, fa0 +; VISNI-NEXT: ret %b = insertelement <4 x half> %a, half %y, i32 0 ret <4 x half> %b } @@ -753,6 +1101,13 @@ define <4 x half> @insertelt_v4f16_3(<4 x half> %a, half %y) { ; ZVFHMIN-NEXT: vmv.s.x v9, a0 ; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: insertelt_v4f16_3: +; VISNI: # %bb.0: +; VISNI-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; VISNI-NEXT: vfmv.s.f v9, fa0 +; VISNI-NEXT: vslideup.vi v8, v9, 3 +; VISNI-NEXT: ret %b = insertelement <4 x half> %a, half %y, i32 3 ret <4 x half> %b } @@ -776,6 +1131,15 @@ define <4 x half> @insertelt_v4f16_idx(<4 x half> %a, half %y, i32 zeroext %idx) ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0 ; ZVFHMIN-NEXT: ret +; +; VISNI-LABEL: insertelt_v4f16_idx: +; VISNI: # %bb.0: +; VISNI-NEXT: addi a1, a0, 1 +; VISNI-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; VISNI-NEXT: vfmv.s.f v9, fa0 +; VISNI-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; VISNI-NEXT: vslideup.vx v8, v9, a0 +; VISNI-NEXT: ret %b = insertelement <4 x half> %a, half %y, i32 %idx ret <4 x half> %b }