From f3116a56b104d51801d71029cc2452070c56fce5 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 18 Mar 2025 14:12:00 -0700 Subject: [PATCH 01/12] [RISCV] Initial codegen support for the XRivosVizip extension This implements initial code generation support for the xrivosvizip extension. A couple of things to note: * The zipeven/zipodd matchers were recently rewritten to better match upstream style, so careful review there would be appreciated. * The zipeven/zipodd cases don't yet support type coercion. This will be done in a future patch. * I subsetted the unzip2a/b support in a way which makes it functional, but far from optimal. A further change will reintroduce some of the complexity once it's easy to test and show incremental change. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 126 ++++++- llvm/lib/Target/RISCV/RISCVISelLowering.h | 10 +- llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td | 42 +++ .../fixed-vectors-shuffle-deinterleave2.ll | 234 ++++++++----- .../fixed-vectors-shuffle-int-interleave.ll | 310 ++++++++++++++++++ .../fixed-vectors-shuffle-zipeven-zipodd.ll | 140 ++++++++ 6 files changed, 782 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 27a4bbce1f5fc..db9535b1a081a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4553,8 +4553,10 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) { /// way through the source. static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget) { - // We need to be able to widen elements to the next larger integer type. - if (VT.getScalarSizeInBits() >= Subtarget.getELen()) + // We need to be able to widen elements to the next larger integer type or + // use the zip2a instruction at e64. + if (VT.getScalarSizeInBits() >= Subtarget.getELen() && + !Subtarget.hasVendorXRivosVizip()) return false; int Size = Mask.size(); @@ -4611,6 +4613,43 @@ static bool isElementRotate(std::array, 2> &SrcInfo, SrcInfo[1].second - SrcInfo[0].second == (int)NumElts; } +static bool isAlternating(std::array, 2> &SrcInfo, + ArrayRef Mask, bool &Polarity) { + int NumElts = Mask.size(); + bool NonUndefFound = false; + for (unsigned i = 0; i != Mask.size(); ++i) { + int M = Mask[i]; + if (M < 0) + continue; + int Src = M >= (int)NumElts; + int Diff = (int)i - (M % NumElts); + bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; + if (!NonUndefFound) { + NonUndefFound = true; + Polarity = (C == i % 2); + continue; + } + if ((Polarity && C != i % 2) || (!Polarity && C == i % 2)) + return false; + } + return true; +} + +static bool isZipEven(std::array, 2> &SrcInfo, + ArrayRef Mask) { + bool Polarity; + return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && + isAlternating(SrcInfo, Mask, Polarity) && Polarity; +; +} + +static bool isZipOdd(std::array, 2> &SrcInfo, + ArrayRef Mask) { + bool Polarity; + return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && + isAlternating(SrcInfo, Mask, Polarity) && !Polarity; +} + // Lower a deinterleave shuffle to SRL and TRUNC. Factor must be // 2, 4, 8 and the integer type Factor-times larger than VT's // element type must be a legal element type. @@ -4870,6 +4909,36 @@ static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { return true; } +static SDValue lowerVIZIP(unsigned Opc, SDValue Op0, SDValue Op1, + const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc || + RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc || + RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc); + assert(Op0.getSimpleValueType() == Op1.getSimpleValueType()); + + MVT VT = Op0.getSimpleValueType(); + MVT IntVT = VT.changeVectorElementTypeToInteger(); + Op0 = DAG.getBitcast(IntVT, Op0); + Op1 = DAG.getBitcast(IntVT, Op1); + + MVT ContainerVT = IntVT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget); + Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget); + SDValue Passthru = DAG.getUNDEF(ContainerVT); + SDValue Res = + DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL); + if (IntVT.isFixedLengthVector()) + Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget); + Res = DAG.getBitcast(VT, Res); + return Res; +} + // Given a vector a, b, c, d return a vector Factor times longer // with Factor-1 undef's between elements. Ex: // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0) @@ -5384,6 +5453,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); MVT XLenVT = Subtarget.getXLenVT(); MVT VT = Op.getSimpleValueType(); + EVT ElemVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); @@ -5556,6 +5626,25 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, } } + // If this is an e64 deinterleave(2) (possibly with two distinct sources) + // match to the vunzip2a/vunzip2b. + unsigned Index = 0; + if (Subtarget.hasVendorXRivosVizip() && ElemVT == MVT::i64 && + ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) && + 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + unsigned Opc = Index == 0 ? + RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL; + V1 = lowerVIZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget); + V2 = lowerVIZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget); + + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, + DAG.getVectorIdxConstant(0, DL)); + V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2, + DAG.getVectorIdxConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V1, V2); + } + if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; @@ -5596,6 +5685,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, DAG.getVectorIdxConstant(OddSrc % Size, DL)); } + // Prefer vzip2a if available. + // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow. + if (Subtarget.hasVendorXRivosVizip()) { + EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + EvenV, DAG.getVectorIdxConstant(0, DL)); + OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + OddV, DAG.getVectorIdxConstant(0, DL)); + return lowerVIZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget); + } return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } @@ -5647,6 +5745,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Res, DAG, Subtarget); } + if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) { + SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2; + SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2; + return lowerVIZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, Subtarget); + } + if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) { + SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2; + SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2; + return lowerVIZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget); + } + // Build the mask. Note that vslideup unconditionally preserves elements // below the slide amount in the destination, and thus those elements are // undefined in the mask. If the mask ends up all true (or undef), it @@ -6710,7 +6819,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6734,12 +6843,13 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) return true; - if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) + if (Opcode >= RISCVISD::VRGATHER_VX_VL && + Opcode <= RISCVISD::LAST_VL_VECTOR_OP) return true; if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) @@ -21758,6 +21868,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VCPOP_VL) NODE_NAME_CASE(VFIRST_VL) + NODE_NAME_CASE(RI_VZIPEVEN_VL) + NODE_NAME_CASE(RI_VZIPODD_VL) + NODE_NAME_CASE(RI_VZIP2A_VL) + NODE_NAME_CASE(RI_VZIP2B_VL) + NODE_NAME_CASE(RI_VUNZIP2A_VL) + NODE_NAME_CASE(RI_VUNZIP2B_VL) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ffbc14a29006c..b271bc68427e9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -403,7 +403,15 @@ enum NodeType : unsigned { // vfirst.m with additional mask and VL operands. VFIRST_VL, - LAST_VL_VECTOR_OP = VFIRST_VL, + // XRivosVizip + RI_VZIPEVEN_VL, + RI_VZIPODD_VL, + RI_VZIP2A_VL, + RI_VZIP2B_VL, + RI_VUNZIP2A_VL, + RI_VUNZIP2B_VL, + + LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL, // Read VLENB CSR READ_VLENB, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index 78c4ed6f00412..395fd917bfe42 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -67,6 +67,46 @@ defm RI_VUNZIP2A_V : VALU_IV_V<"ri.vunzip2a", 0b001000>; defm RI_VUNZIP2B_V : VALU_IV_V<"ri.vunzip2b", 0b011000>; } +// These are modeled after the int binop VL nodes +def ri_vzipeven_vl : SDNode<"RISCVISD::RI_VZIPEVEN_VL", SDT_RISCVIntBinOp_VL>; +def ri_vzipodd_vl : SDNode<"RISCVISD::RI_VZIPODD_VL", SDT_RISCVIntBinOp_VL>; +def ri_vzip2a_vl : SDNode<"RISCVISD::RI_VZIP2A_VL", SDT_RISCVIntBinOp_VL>; +def ri_vunzip2a_vl : SDNode<"RISCVISD::RI_VUNZIP2A_VL", SDT_RISCVIntBinOp_VL>; +def ri_vunzip2b_vl : SDNode<"RISCVISD::RI_VUNZIP2B_VL", SDT_RISCVIntBinOp_VL>; + +multiclass RIVPseudoVALU_VV { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoBinaryV_VV; + } +} + +let Predicates = [HasVendorXRivosVizip], + Constraints = "@earlyclobber $rd, $rd = $passthru" in { +defm PseudoRI_VZIPEVEN : RIVPseudoVALU_VV; +defm PseudoRI_VZIPODD : RIVPseudoVALU_VV; +defm PseudoRI_VZIP2A : RIVPseudoVALU_VV; +defm PseudoRI_VUNZIP2A : RIVPseudoVALU_VV; +defm PseudoRI_VUNZIP2B : RIVPseudoVALU_VV; +} + +multiclass RIVPatBinaryVL_VV vtilist = AllIntegerVectors, + bit isSEWAware = 0> { + foreach vti = vtilist in + let Predicates = GetVTypePredicates.Predicates in + def : VPatBinaryVL_V; +} + +defm : RIVPatBinaryVL_VV; +defm : RIVPatBinaryVL_VV; +defm : RIVPatBinaryVL_VV; +defm : RIVPatBinaryVL_VV; +defm : RIVPatBinaryVL_VV; + //===----------------------------------------------------------------------===// // XRivosVisni //===----------------------------------------------------------------------===// @@ -87,3 +127,5 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd), (ins VR:$vs2, uimm5:$imm), "ri.vextract.x.v", "$rd, $vs2, $imm">; } + + diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll index 9279e0a4d3a6c..2165c6025f7e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll @@ -1,10 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b \ ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ -; RUN: | FileCheck %s --check-prefixes=CHECK,V +; RUN: | FileCheck %s --check-prefixes=CHECK,V,V-NOZIP ; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \ ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b,+experimental-xrivosvizip \ +; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefixes=CHECK,V,ZIP define void @vnsrl_0_i8(ptr %in, ptr %out) { ; CHECK-LABEL: vnsrl_0_i8: @@ -247,15 +250,15 @@ entry: } define void @vnsrl_0_i64(ptr %in, ptr %out) { -; V-LABEL: vnsrl_0_i64: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: vslideup.vi v8, v9, 1 -; V-NEXT: vse64.v v8, (a1) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_0_i64: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V-NOZIP-NEXT: vle64.v v8, (a0) +; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2 +; V-NOZIP-NEXT: vslideup.vi v8, v9, 1 +; V-NOZIP-NEXT: vse64.v v8, (a1) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_0_i64: ; ZVE32F: # %bb.0: # %entry @@ -264,6 +267,18 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) { ; ZVE32F-NEXT: sd a2, 0(a1) ; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_0_i64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vle64.v v8, (a0) +; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9 +; ZIP-NEXT: vslidedown.vi v8, v8, 2 +; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9 +; ZIP-NEXT: vslideup.vi v10, v11, 1 +; ZIP-NEXT: vse64.v v10, (a1) +; ZIP-NEXT: ret entry: %0 = load <4 x i64>, ptr %in, align 8 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> @@ -272,16 +287,16 @@ entry: } define void @vnsrl_64_i64(ptr %in, ptr %out) { -; V-LABEL: vnsrl_64_i64: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NEXT: vse64.v v9, (a1) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_64_i64: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V-NOZIP-NEXT: vle64.v v8, (a0) +; V-NOZIP-NEXT: vmv.v.i v0, 1 +; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2 +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NOZIP-NEXT: vse64.v v9, (a1) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_i64: ; ZVE32F: # %bb.0: # %entry @@ -290,6 +305,18 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) { ; ZVE32F-NEXT: sd a2, 0(a1) ; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_64_i64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vle64.v v8, (a0) +; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9 +; ZIP-NEXT: vslidedown.vi v8, v8, 2 +; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v9 +; ZIP-NEXT: vslideup.vi v10, v11, 1 +; ZIP-NEXT: vse64.v v10, (a1) +; ZIP-NEXT: ret entry: %0 = load <4 x i64>, ptr %in, align 8 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> @@ -323,16 +350,16 @@ entry: } define void @vnsrl_64_double(ptr %in, ptr %out) { -; V-LABEL: vnsrl_64_double: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NEXT: vse64.v v9, (a1) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_64_double: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V-NOZIP-NEXT: vle64.v v8, (a0) +; V-NOZIP-NEXT: vmv.v.i v0, 1 +; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2 +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NOZIP-NEXT: vse64.v v9, (a1) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_double: ; ZVE32F: # %bb.0: # %entry @@ -341,6 +368,16 @@ define void @vnsrl_64_double(ptr %in, ptr %out) { ; ZVE32F-NEXT: sd a2, 0(a1) ; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_64_double: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vle64.v v8, (a0) +; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v9, v8, 2 +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vse64.v v10, (a1) +; ZIP-NEXT: ret entry: %0 = load <4 x double>, ptr %in, align 8 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> @@ -802,15 +839,15 @@ entry: } define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-LABEL: vnsrl_32_i32_two_source: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; V-NEXT: vle32.v v8, (a0) -; V-NEXT: vle32.v v9, (a1) -; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NEXT: vse32.v v9, (a2) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_32_i32_two_source: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; V-NOZIP-NEXT: vle32.v v8, (a0) +; V-NOZIP-NEXT: vle32.v v9, (a1) +; V-NOZIP-NEXT: vmv.v.i v0, 1 +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NOZIP-NEXT: vse32.v v9, (a2) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_32_i32_two_source: ; ZVE32F: # %bb.0: # %entry @@ -821,6 +858,15 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_32_i32_two_source: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZIP-NEXT: vle32.v v8, (a0) +; ZIP-NEXT: vle32.v v9, (a1) +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vse32.v v10, (a2) +; ZIP-NEXT: ret entry: %0 = load <2 x i32>, ptr %in0, align 4 %1 = load <2 x i32>, ptr %in1, align 4 @@ -856,15 +902,15 @@ entry: } define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-LABEL: vnsrl_32_float_two_source: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; V-NEXT: vle32.v v8, (a0) -; V-NEXT: vle32.v v9, (a1) -; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NEXT: vse32.v v9, (a2) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_32_float_two_source: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; V-NOZIP-NEXT: vle32.v v8, (a0) +; V-NOZIP-NEXT: vle32.v v9, (a1) +; V-NOZIP-NEXT: vmv.v.i v0, 1 +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NOZIP-NEXT: vse32.v v9, (a2) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_32_float_two_source: ; ZVE32F: # %bb.0: # %entry @@ -875,6 +921,15 @@ define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_32_float_two_source: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZIP-NEXT: vle32.v v8, (a0) +; ZIP-NEXT: vle32.v v9, (a1) +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vse32.v v10, (a2) +; ZIP-NEXT: ret entry: %0 = load <2 x float>, ptr %in0, align 4 %1 = load <2 x float>, ptr %in1, align 4 @@ -884,14 +939,14 @@ entry: } define void @vnsrl_0_i64_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-LABEL: vnsrl_0_i64_two_source: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vle64.v v9, (a1) -; V-NEXT: vslideup.vi v8, v9, 1 -; V-NEXT: vse64.v v8, (a2) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_0_i64_two_source: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; V-NOZIP-NEXT: vle64.v v8, (a0) +; V-NOZIP-NEXT: vle64.v v9, (a1) +; V-NOZIP-NEXT: vslideup.vi v8, v9, 1 +; V-NOZIP-NEXT: vse64.v v8, (a2) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_0_i64_two_source: ; ZVE32F: # %bb.0: # %entry @@ -901,6 +956,17 @@ define void @vnsrl_0_i64_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; ZVE32F-NEXT: vse32.v v8, (a2) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_0_i64_two_source: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZIP-NEXT: vle64.v v8, (a0) +; ZIP-NEXT: vle64.v v9, (a1) +; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v10 +; ZIP-NEXT: ri.vunzip2a.vv v10, v9, v8 +; ZIP-NEXT: vslideup.vi v11, v10, 1 +; ZIP-NEXT: vse64.v v11, (a2) +; ZIP-NEXT: ret entry: %0 = load <2 x i64>, ptr %in0, align 8 %1 = load <2 x i64>, ptr %in1, align 8 @@ -910,15 +976,15 @@ entry: } define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-LABEL: vnsrl_64_i64_two_source: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vle64.v v9, (a1) -; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NEXT: vse64.v v9, (a2) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_64_i64_two_source: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NOZIP-NEXT: vle64.v v8, (a0) +; V-NOZIP-NEXT: vle64.v v9, (a1) +; V-NOZIP-NEXT: vmv.v.i v0, 1 +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NOZIP-NEXT: vse64.v v9, (a2) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_i64_two_source: ; ZVE32F: # %bb.0: # %entry @@ -929,6 +995,17 @@ define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_64_i64_two_source: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZIP-NEXT: vle64.v v8, (a0) +; ZIP-NEXT: vle64.v v9, (a1) +; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v10 +; ZIP-NEXT: ri.vunzip2b.vv v10, v9, v8 +; ZIP-NEXT: vslideup.vi v11, v10, 1 +; ZIP-NEXT: vse64.v v11, (a2) +; ZIP-NEXT: ret entry: %0 = load <2 x i64>, ptr %in0, align 8 %1 = load <2 x i64>, ptr %in1, align 8 @@ -963,15 +1040,15 @@ entry: } define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-LABEL: vnsrl_64_double_two_source: -; V: # %bb.0: # %entry -; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vle64.v v9, (a1) -; V-NEXT: vmv.v.i v0, 1 -; V-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NEXT: vse64.v v9, (a2) -; V-NEXT: ret +; V-NOZIP-LABEL: vnsrl_64_double_two_source: +; V-NOZIP: # %bb.0: # %entry +; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NOZIP-NEXT: vle64.v v8, (a0) +; V-NOZIP-NEXT: vle64.v v9, (a1) +; V-NOZIP-NEXT: vmv.v.i v0, 1 +; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NOZIP-NEXT: vse64.v v9, (a2) +; V-NOZIP-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_double_two_source: ; ZVE32F: # %bb.0: # %entry @@ -980,6 +1057,15 @@ define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: sd a0, 0(a2) ; ZVE32F-NEXT: sd a1, 8(a2) ; ZVE32F-NEXT: ret +; +; ZIP-LABEL: vnsrl_64_double_two_source: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZIP-NEXT: vle64.v v8, (a0) +; ZIP-NEXT: vle64.v v9, (a1) +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vse64.v v10, (a2) +; ZIP-NEXT: ret entry: %0 = load <2 x double>, ptr %in0, align 8 %1 = load <2 x double>, ptr %in1, align 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll index 93c00d5c03717..58967ed505ba9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV32-ZIP +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV64-ZIP ; Test optimizing interleaves to widening arithmetic. @@ -15,6 +17,13 @@ define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: vwmaccu.vx v10, a0, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: interleave_v2i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> ret <4 x i8> %a } @@ -28,6 +37,13 @@ define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) { ; CHECK-NEXT: vwmaccu.vx v10, a0, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: interleave_v2i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> ret <4 x i16> %a } @@ -42,6 +58,13 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-NEXT: vwmaccu.vx v10, a0, v8 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: interleave_v2i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> ret <4 x i32> %a } @@ -72,6 +95,14 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V512-NEXT: vslideup.vi v11, v8, 1 ; V512-NEXT: vmerge.vvm v8, v11, v10, v0 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v2i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> ret <4 x i64> %a } @@ -95,6 +126,13 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v8 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> ret <8 x i8> %a } @@ -118,6 +156,13 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> ret <8 x i16> %a } @@ -141,6 +186,14 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> ret <8 x i32> %a } @@ -167,6 +220,15 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i32_offset_2: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v9, 2 +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %a } @@ -198,6 +260,17 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; V512-NEXT: vmerge.vvm v8, v9, v10, v0 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i32_offset_1: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; ZIP-NEXT: vmv.v.i v0, 8 +; ZIP-NEXT: vmv1r.v v10, v9 +; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t +; ZIP-NEXT: vmv.v.i v0, 10 +; ZIP-NEXT: ri.vzip2a.vv v11, v8, v9 +; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %a } @@ -220,6 +293,13 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v8i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> ret <16 x i8> %a } @@ -244,6 +324,14 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v8 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v12, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> ret <16 x i16> %a } @@ -267,6 +355,14 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZIP-NEXT: vmv2r.v v16, v10 +; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> ret <16 x i32> %a } @@ -290,6 +386,16 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v16i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> ret <32 x i8> %a } @@ -313,6 +419,16 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v16i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv2r.v v16, v10 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> ret <32 x i16> %a } @@ -337,6 +453,16 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) { ; V512-NEXT: li a0, -1 ; V512-NEXT: vwmaccu.vx v8, a0, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v16i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv4r.v v24, v12 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24 +; ZIP-NEXT: vmv.v.v v8, v16 +; ZIP-NEXT: ret %a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> ret <32 x i32> %a } @@ -363,6 +489,16 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v32i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv2r.v v16, v10 +; ZIP-NEXT: li a0, 64 +; ZIP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> ret <64 x i8> %a } @@ -391,6 +527,16 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) { ; V512-NEXT: li a0, -1 ; V512-NEXT: vwmaccu.vx v8, a0, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v32i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv4r.v v24, v12 +; ZIP-NEXT: li a0, 64 +; ZIP-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24 +; ZIP-NEXT: vmv.v.v v8, v16 +; ZIP-NEXT: ret %a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> ret <64 x i16> %a } @@ -446,6 +592,78 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V512-NEXT: li a0, -1 ; V512-NEXT: vwmaccu.vx v8, a0, v12 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v32i32: +; ZIP: # %bb.0: +; ZIP-NEXT: addi sp, sp, -16 +; ZIP-NEXT: .cfi_def_cfa_offset 16 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: slli a0, a0, 5 +; ZIP-NEXT: sub sp, sp, a0 +; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: li a1, 24 +; ZIP-NEXT: mul a0, a0, a1 +; ZIP-NEXT: add a0, sp, a0 +; ZIP-NEXT: addi a0, a0, 16 +; ZIP-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; ZIP-NEXT: vslidedown.vi v24, v8, 16 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v24, v0 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: li a2, 24 +; ZIP-NEXT: mul a1, a1, a2 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; ZIP-NEXT: vslidedown.vi v24, v24, 16 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 4 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZIP-NEXT: lui a1, 699051 +; ZIP-NEXT: addi a1, a1, -1366 +; ZIP-NEXT: vmv.s.x v0, a1 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 3 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 4 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 3 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24, v0.t +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: li a1, 24 +; ZIP-NEXT: mul a0, a0, a1 +; ZIP-NEXT: add a0, sp, a0 +; ZIP-NEXT: addi a0, a0, 16 +; ZIP-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZIP-NEXT: ri.vzip2a.vv v0, v8, v24 +; ZIP-NEXT: vmv.v.v v8, v0 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: slli a0, a0, 5 +; ZIP-NEXT: add sp, sp, a0 +; ZIP-NEXT: .cfi_def_cfa sp, 16 +; ZIP-NEXT: addi sp, sp, 16 +; ZIP-NEXT: .cfi_def_cfa_offset 0 +; ZIP-NEXT: ret %a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> ret <64 x i32> %a } @@ -471,6 +689,15 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -498,6 +725,17 @@ define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) { ; V512-NEXT: vrgather.vv v9, v8, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i8_invalid: +; ZIP: # %bb.0: +; ZIP-NEXT: lui a0, 16 +; ZIP-NEXT: addi a0, a0, 768 +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vmv.s.x v10, a0 +; ZIP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; ZIP-NEXT: vrgather.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -523,6 +761,15 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %a } @@ -548,6 +795,15 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> ret <4 x i32> %a } @@ -590,6 +846,15 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { ; RV64-V512-NEXT: vrgather.vv v9, v8, v10 ; RV64-V512-NEXT: vmv.v.v v8, v9 ; RV64-V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma +; ZIP-NEXT: vslidedown.vi v12, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> ret <4 x i64> %a } @@ -615,6 +880,15 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v8i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 4 +; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> ret <8 x i8> %a } @@ -640,6 +914,15 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v8 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 4 +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v10, v8 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> ret <8 x i16> %a } @@ -665,6 +948,15 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; ZIP-NEXT: vslidedown.vi v12, v8, 4 +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> ret <8 x i32> %a } @@ -679,6 +971,14 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) { ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: unary_interleave_10uu_v4i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZIP-NEXT: vsrl.vi v9, v8, 8 +; ZIP-NEXT: vsll.vi v8, v8, 8 +; ZIP-NEXT: vor.vv v8, v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -702,6 +1002,14 @@ define <16 x i16> @interleave_slp(<8 x i16> %v0, <8 x i16> %v1) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_slp: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> %v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> @@ -711,4 +1019,6 @@ entry: ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32-V128: {{.*}} +; RV32-ZIP: {{.*}} ; RV64-V128: {{.*}} +; RV64-ZIP: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll index 75eb75fbd65b1..4c64af3fa1b8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV64 define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: zipeven_v4i32: @@ -9,6 +11,13 @@ define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -22,6 +31,13 @@ define <4 x i32> @zipeven_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_swapped: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v9, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -35,6 +51,13 @@ define <4 x i64> @zipeven_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %c @@ -64,6 +87,30 @@ define <4 x half> @zipeven_v4f16(<4 x half> %a, <4 x half> %b) { ; RV64-NEXT: sh a1, 4(a0) ; RV64-NEXT: sh a2, 6(a0) ; RV64-NEXT: ret +; +; ZIP-RV32-LABEL: zipeven_v4f16: +; ZIP-RV32: # %bb.0: # %entry +; ZIP-RV32-NEXT: lh a3, 0(a1) +; ZIP-RV32-NEXT: lh a4, 0(a2) +; ZIP-RV32-NEXT: lh a1, 8(a1) +; ZIP-RV32-NEXT: lh a2, 8(a2) +; ZIP-RV32-NEXT: sh a3, 0(a0) +; ZIP-RV32-NEXT: sh a4, 2(a0) +; ZIP-RV32-NEXT: sh a1, 4(a0) +; ZIP-RV32-NEXT: sh a2, 6(a0) +; ZIP-RV32-NEXT: ret +; +; ZIP-RV64-LABEL: zipeven_v4f16: +; ZIP-RV64: # %bb.0: # %entry +; ZIP-RV64-NEXT: lh a3, 0(a1) +; ZIP-RV64-NEXT: lh a4, 0(a2) +; ZIP-RV64-NEXT: lh a1, 16(a1) +; ZIP-RV64-NEXT: lh a2, 16(a2) +; ZIP-RV64-NEXT: sh a3, 0(a0) +; ZIP-RV64-NEXT: sh a4, 2(a0) +; ZIP-RV64-NEXT: sh a1, 4(a0) +; ZIP-RV64-NEXT: sh a2, 6(a0) +; ZIP-RV64-NEXT: ret entry: %c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> ret <4 x half> %c @@ -76,6 +123,13 @@ define <4 x float> @zipeven_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4f32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %c @@ -89,6 +143,13 @@ define <4 x double> @zipeven_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4f64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %c @@ -103,6 +164,13 @@ define <4 x i32> @zipodd_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -115,6 +183,13 @@ define <4 x i32> @zipodd_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vslidedown.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_swapped: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v10, v9, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -124,6 +199,10 @@ define <4 x i32> @zipeven_v4i32_single(<4 x i32> %a) { ; CHECK-LABEL: zipeven_v4i32_single: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_single: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -135,6 +214,12 @@ define <4 x i32> @zipodd_v4i32_single(<4 x i32> %a) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_single: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v8, v8, 1 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -147,6 +232,13 @@ define <4 x i32> @zipodd_v4i32_both(<4 x i32> %a) { ; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_both: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v9, v8, v8 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -161,6 +253,13 @@ define <4 x i32> @zipeven_v4i32_both(<4 x i32> %a) { ; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_both: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v9, v8, v8 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -172,6 +271,12 @@ define <4 x i32> @zipeven_v4i32_partial(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_partial: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; ZIP-NEXT: vslideup.vi v8, v9, 1 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -185,6 +290,13 @@ define <4 x i32> @zipodd_v4i32_partial(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_partial: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -198,6 +310,13 @@ define <8 x i32> @zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) { ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> ret <8 x i32> %out } @@ -211,6 +330,13 @@ define <8 x i32> @zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> ret <8 x i32> %out } @@ -224,6 +350,13 @@ define <16 x i64> @zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) { ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v16i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v24, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v24 +; ZIP-NEXT: ret %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> ret <16 x i64> %out } @@ -238,6 +371,13 @@ define <16 x i64> @zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) { ; CHECK-NEXT: vslidedown.vi v16, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v16i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v24, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v24 +; ZIP-NEXT: ret %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> ret <16 x i64> %out } From 5da5c6c18c1d0d7c0602174175c72a9ff6812c4b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 18 Mar 2025 16:18:07 -0700 Subject: [PATCH 02/12] clang-format --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 30 +++++++++++---------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index db9535b1a081a..3356bc4688ddc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4620,7 +4620,7 @@ static bool isAlternating(std::array, 2> &SrcInfo, for (unsigned i = 0; i != Mask.size(); ++i) { int M = Mask[i]; if (M < 0) - continue; + continue; int Src = M >= (int)NumElts; int Diff = (int)i - (M % NumElts); bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; @@ -4639,15 +4639,15 @@ static bool isZipEven(std::array, 2> &SrcInfo, ArrayRef Mask) { bool Polarity; return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && - isAlternating(SrcInfo, Mask, Polarity) && Polarity; -; + isAlternating(SrcInfo, Mask, Polarity) && Polarity; + ; } static bool isZipOdd(std::array, 2> &SrcInfo, ArrayRef Mask) { bool Polarity; return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && - isAlternating(SrcInfo, Mask, Polarity) && !Polarity; + isAlternating(SrcInfo, Mask, Polarity) && !Polarity; } // Lower a deinterleave shuffle to SRL and TRUNC. Factor must be @@ -4931,8 +4931,7 @@ static SDValue lowerVIZIP(unsigned Opc, SDValue Op0, SDValue Op1, auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget); SDValue Passthru = DAG.getUNDEF(ContainerVT); - SDValue Res = - DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL); + SDValue Res = DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL); if (IntVT.isFixedLengthVector()) Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget); Res = DAG.getBitcast(VT, Res); @@ -5633,8 +5632,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) && 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { MVT HalfVT = VT.getHalfNumVectorElementsVT(); - unsigned Opc = Index == 0 ? - RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL; + unsigned Opc = + Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL; V1 = lowerVIZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget); V2 = lowerVIZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget); @@ -5687,12 +5686,13 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // Prefer vzip2a if available. // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow. - if (Subtarget.hasVendorXRivosVizip()) { + if (Subtarget.hasVendorXRivosVizip()) { EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), EvenV, DAG.getVectorIdxConstant(0, DL)); - OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), - OddV, DAG.getVectorIdxConstant(0, DL)); - return lowerVIZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget); + OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV, + DAG.getVectorIdxConstant(0, DL)); + return lowerVIZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, + Subtarget); } return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } @@ -5748,12 +5748,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) { SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2; SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2; - return lowerVIZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, Subtarget); + return lowerVIZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, + Subtarget); } if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) { SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2; SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2; - return lowerVIZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget); + return lowerVIZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, + Subtarget); } // Build the mask. Note that vslideup unconditionally preserves elements From 99cb8c30a35c88d1b7a50427c1e9a9105d702e81 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 21 Mar 2025 09:58:24 -0700 Subject: [PATCH 03/12] Remove unused RI_VZIP2B_VL ISD --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++++----- llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3356bc4688ddc..4b5669777c37a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4913,8 +4913,8 @@ static SDValue lowerVIZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc || - RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc || - RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc); + RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2A_VL == Opc || + RISCVISD::RI_VUNZIP2B_VL == Opc); assert(Op0.getSimpleValueType() == Op1.getSimpleValueType()); MVT VT = Op0.getSimpleValueType(); @@ -6821,7 +6821,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6845,7 +6845,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) @@ -21873,7 +21873,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(RI_VZIPEVEN_VL) NODE_NAME_CASE(RI_VZIPODD_VL) NODE_NAME_CASE(RI_VZIP2A_VL) - NODE_NAME_CASE(RI_VZIP2B_VL) NODE_NAME_CASE(RI_VUNZIP2A_VL) NODE_NAME_CASE(RI_VUNZIP2B_VL) NODE_NAME_CASE(READ_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index b271bc68427e9..cd0171a5b4dd0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -407,7 +407,6 @@ enum NodeType : unsigned { RI_VZIPEVEN_VL, RI_VZIPODD_VL, RI_VZIP2A_VL, - RI_VZIP2B_VL, RI_VUNZIP2A_VL, RI_VUNZIP2B_VL, From e13929704b90c8ecdfce0427d67d979c6c44205f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 21 Mar 2025 10:14:01 -0700 Subject: [PATCH 04/12] Remove vunzip2a/b lowering for now --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 28 +-- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +- llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td | 8 - .../fixed-vectors-shuffle-deinterleave2.ll | 234 ++++++------------ 4 files changed, 78 insertions(+), 196 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4b5669777c37a..e3289c854fdce 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4913,8 +4913,7 @@ static SDValue lowerVIZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc || - RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2A_VL == Opc || - RISCVISD::RI_VUNZIP2B_VL == Opc); + RISCVISD::RI_VZIP2A_VL == Opc); assert(Op0.getSimpleValueType() == Op1.getSimpleValueType()); MVT VT = Op0.getSimpleValueType(); @@ -5452,7 +5451,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); MVT XLenVT = Subtarget.getXLenVT(); MVT VT = Op.getSimpleValueType(); - EVT ElemVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); @@ -5625,24 +5623,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, } } - // If this is an e64 deinterleave(2) (possibly with two distinct sources) - // match to the vunzip2a/vunzip2b. - unsigned Index = 0; - if (Subtarget.hasVendorXRivosVizip() && ElemVT == MVT::i64 && - ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) && - 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { - MVT HalfVT = VT.getHalfNumVectorElementsVT(); - unsigned Opc = - Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL; - V1 = lowerVIZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget); - V2 = lowerVIZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget); - - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, - DAG.getVectorIdxConstant(0, DL)); - V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2, - DAG.getVectorIdxConstant(0, DL)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V1, V2); - } if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) @@ -6821,7 +6801,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6845,7 +6825,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) @@ -21873,8 +21853,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(RI_VZIPEVEN_VL) NODE_NAME_CASE(RI_VZIPODD_VL) NODE_NAME_CASE(RI_VZIP2A_VL) - NODE_NAME_CASE(RI_VUNZIP2A_VL) - NODE_NAME_CASE(RI_VUNZIP2B_VL) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index cd0171a5b4dd0..57f9c00538f88 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -407,10 +407,8 @@ enum NodeType : unsigned { RI_VZIPEVEN_VL, RI_VZIPODD_VL, RI_VZIP2A_VL, - RI_VUNZIP2A_VL, - RI_VUNZIP2B_VL, - LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL, + LAST_VL_VECTOR_OP = RI_VZIP2A_VL, // Read VLENB CSR READ_VLENB, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index 395fd917bfe42..bb93fea656051 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -71,8 +71,6 @@ defm RI_VUNZIP2B_V : VALU_IV_V<"ri.vunzip2b", 0b011000>; def ri_vzipeven_vl : SDNode<"RISCVISD::RI_VZIPEVEN_VL", SDT_RISCVIntBinOp_VL>; def ri_vzipodd_vl : SDNode<"RISCVISD::RI_VZIPODD_VL", SDT_RISCVIntBinOp_VL>; def ri_vzip2a_vl : SDNode<"RISCVISD::RI_VZIP2A_VL", SDT_RISCVIntBinOp_VL>; -def ri_vunzip2a_vl : SDNode<"RISCVISD::RI_VUNZIP2A_VL", SDT_RISCVIntBinOp_VL>; -def ri_vunzip2b_vl : SDNode<"RISCVISD::RI_VUNZIP2B_VL", SDT_RISCVIntBinOp_VL>; multiclass RIVPseudoVALU_VV { foreach m = MxList in { @@ -86,8 +84,6 @@ let Predicates = [HasVendorXRivosVizip], defm PseudoRI_VZIPEVEN : RIVPseudoVALU_VV; defm PseudoRI_VZIPODD : RIVPseudoVALU_VV; defm PseudoRI_VZIP2A : RIVPseudoVALU_VV; -defm PseudoRI_VUNZIP2A : RIVPseudoVALU_VV; -defm PseudoRI_VUNZIP2B : RIVPseudoVALU_VV; } multiclass RIVPatBinaryVL_VV; defm : RIVPatBinaryVL_VV; defm : RIVPatBinaryVL_VV; -defm : RIVPatBinaryVL_VV; -defm : RIVPatBinaryVL_VV; //===----------------------------------------------------------------------===// // XRivosVisni @@ -127,5 +121,3 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd), (ins VR:$vs2, uimm5:$imm), "ri.vextract.x.v", "$rd, $vs2, $imm">; } - - diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll index 2165c6025f7e7..9279e0a4d3a6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll @@ -1,13 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b \ ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ -; RUN: | FileCheck %s --check-prefixes=CHECK,V,V-NOZIP +; RUN: | FileCheck %s --check-prefixes=CHECK,V ; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \ ; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ ; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b,+experimental-xrivosvizip \ -; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \ -; RUN: | FileCheck %s --check-prefixes=CHECK,V,ZIP define void @vnsrl_0_i8(ptr %in, ptr %out) { ; CHECK-LABEL: vnsrl_0_i8: @@ -250,15 +247,15 @@ entry: } define void @vnsrl_0_i64(ptr %in, ptr %out) { -; V-NOZIP-LABEL: vnsrl_0_i64: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; V-NOZIP-NEXT: vle64.v v8, (a0) -; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2 -; V-NOZIP-NEXT: vslideup.vi v8, v9, 1 -; V-NOZIP-NEXT: vse64.v v8, (a1) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_0_i64: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V-NEXT: vle64.v v8, (a0) +; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vslideup.vi v8, v9, 1 +; V-NEXT: vse64.v v8, (a1) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_0_i64: ; ZVE32F: # %bb.0: # %entry @@ -267,18 +264,6 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) { ; ZVE32F-NEXT: sd a2, 0(a1) ; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_0_i64: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; ZIP-NEXT: vle64.v v8, (a0) -; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9 -; ZIP-NEXT: vslidedown.vi v8, v8, 2 -; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9 -; ZIP-NEXT: vslideup.vi v10, v11, 1 -; ZIP-NEXT: vse64.v v10, (a1) -; ZIP-NEXT: ret entry: %0 = load <4 x i64>, ptr %in, align 8 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> @@ -287,16 +272,16 @@ entry: } define void @vnsrl_64_i64(ptr %in, ptr %out) { -; V-NOZIP-LABEL: vnsrl_64_i64: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; V-NOZIP-NEXT: vle64.v v8, (a0) -; V-NOZIP-NEXT: vmv.v.i v0, 1 -; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2 -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NOZIP-NEXT: vse64.v v9, (a1) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_64_i64: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V-NEXT: vle64.v v8, (a0) +; V-NEXT: vmv.v.i v0, 1 +; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NEXT: vse64.v v9, (a1) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_i64: ; ZVE32F: # %bb.0: # %entry @@ -305,18 +290,6 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) { ; ZVE32F-NEXT: sd a2, 0(a1) ; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_64_i64: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; ZIP-NEXT: vle64.v v8, (a0) -; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9 -; ZIP-NEXT: vslidedown.vi v8, v8, 2 -; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v9 -; ZIP-NEXT: vslideup.vi v10, v11, 1 -; ZIP-NEXT: vse64.v v10, (a1) -; ZIP-NEXT: ret entry: %0 = load <4 x i64>, ptr %in, align 8 %shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> @@ -350,16 +323,16 @@ entry: } define void @vnsrl_64_double(ptr %in, ptr %out) { -; V-NOZIP-LABEL: vnsrl_64_double: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; V-NOZIP-NEXT: vle64.v v8, (a0) -; V-NOZIP-NEXT: vmv.v.i v0, 1 -; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2 -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NOZIP-NEXT: vse64.v v9, (a1) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_64_double: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; V-NEXT: vle64.v v8, (a0) +; V-NEXT: vmv.v.i v0, 1 +; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NEXT: vse64.v v9, (a1) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_double: ; ZVE32F: # %bb.0: # %entry @@ -368,16 +341,6 @@ define void @vnsrl_64_double(ptr %in, ptr %out) { ; ZVE32F-NEXT: sd a2, 0(a1) ; ZVE32F-NEXT: sd a0, 8(a1) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_64_double: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; ZIP-NEXT: vle64.v v8, (a0) -; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZIP-NEXT: vslidedown.vi v9, v8, 2 -; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 -; ZIP-NEXT: vse64.v v10, (a1) -; ZIP-NEXT: ret entry: %0 = load <4 x double>, ptr %in, align 8 %shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> @@ -839,15 +802,15 @@ entry: } define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-NOZIP-LABEL: vnsrl_32_i32_two_source: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; V-NOZIP-NEXT: vle32.v v8, (a0) -; V-NOZIP-NEXT: vle32.v v9, (a1) -; V-NOZIP-NEXT: vmv.v.i v0, 1 -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NOZIP-NEXT: vse32.v v9, (a2) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_32_i32_two_source: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; V-NEXT: vle32.v v8, (a0) +; V-NEXT: vle32.v v9, (a1) +; V-NEXT: vmv.v.i v0, 1 +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NEXT: vse32.v v9, (a2) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_32_i32_two_source: ; ZVE32F: # %bb.0: # %entry @@ -858,15 +821,6 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_32_i32_two_source: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; ZIP-NEXT: vle32.v v8, (a0) -; ZIP-NEXT: vle32.v v9, (a1) -; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 -; ZIP-NEXT: vse32.v v10, (a2) -; ZIP-NEXT: ret entry: %0 = load <2 x i32>, ptr %in0, align 4 %1 = load <2 x i32>, ptr %in1, align 4 @@ -902,15 +856,15 @@ entry: } define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-NOZIP-LABEL: vnsrl_32_float_two_source: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; V-NOZIP-NEXT: vle32.v v8, (a0) -; V-NOZIP-NEXT: vle32.v v9, (a1) -; V-NOZIP-NEXT: vmv.v.i v0, 1 -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NOZIP-NEXT: vse32.v v9, (a2) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_32_float_two_source: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; V-NEXT: vle32.v v8, (a0) +; V-NEXT: vle32.v v9, (a1) +; V-NEXT: vmv.v.i v0, 1 +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NEXT: vse32.v v9, (a2) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_32_float_two_source: ; ZVE32F: # %bb.0: # %entry @@ -921,15 +875,6 @@ define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_32_float_two_source: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; ZIP-NEXT: vle32.v v8, (a0) -; ZIP-NEXT: vle32.v v9, (a1) -; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 -; ZIP-NEXT: vse32.v v10, (a2) -; ZIP-NEXT: ret entry: %0 = load <2 x float>, ptr %in0, align 4 %1 = load <2 x float>, ptr %in1, align 4 @@ -939,14 +884,14 @@ entry: } define void @vnsrl_0_i64_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-NOZIP-LABEL: vnsrl_0_i64_two_source: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; V-NOZIP-NEXT: vle64.v v8, (a0) -; V-NOZIP-NEXT: vle64.v v9, (a1) -; V-NOZIP-NEXT: vslideup.vi v8, v9, 1 -; V-NOZIP-NEXT: vse64.v v8, (a2) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_0_i64_two_source: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; V-NEXT: vle64.v v8, (a0) +; V-NEXT: vle64.v v9, (a1) +; V-NEXT: vslideup.vi v8, v9, 1 +; V-NEXT: vse64.v v8, (a2) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_0_i64_two_source: ; ZVE32F: # %bb.0: # %entry @@ -956,17 +901,6 @@ define void @vnsrl_0_i64_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; ZVE32F-NEXT: vse32.v v8, (a2) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_0_i64_two_source: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZIP-NEXT: vle64.v v8, (a0) -; ZIP-NEXT: vle64.v v9, (a1) -; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v10 -; ZIP-NEXT: ri.vunzip2a.vv v10, v9, v8 -; ZIP-NEXT: vslideup.vi v11, v10, 1 -; ZIP-NEXT: vse64.v v11, (a2) -; ZIP-NEXT: ret entry: %0 = load <2 x i64>, ptr %in0, align 8 %1 = load <2 x i64>, ptr %in1, align 8 @@ -976,15 +910,15 @@ entry: } define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-NOZIP-LABEL: vnsrl_64_i64_two_source: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NOZIP-NEXT: vle64.v v8, (a0) -; V-NOZIP-NEXT: vle64.v v9, (a1) -; V-NOZIP-NEXT: vmv.v.i v0, 1 -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NOZIP-NEXT: vse64.v v9, (a2) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_64_i64_two_source: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NEXT: vle64.v v8, (a0) +; V-NEXT: vle64.v v9, (a1) +; V-NEXT: vmv.v.i v0, 1 +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NEXT: vse64.v v9, (a2) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_i64_two_source: ; ZVE32F: # %bb.0: # %entry @@ -995,17 +929,6 @@ define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2, v0.t ; ZVE32F-NEXT: vse32.v v9, (a2) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_64_i64_two_source: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZIP-NEXT: vle64.v v8, (a0) -; ZIP-NEXT: vle64.v v9, (a1) -; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v10 -; ZIP-NEXT: ri.vunzip2b.vv v10, v9, v8 -; ZIP-NEXT: vslideup.vi v11, v10, 1 -; ZIP-NEXT: vse64.v v11, (a2) -; ZIP-NEXT: ret entry: %0 = load <2 x i64>, ptr %in0, align 8 %1 = load <2 x i64>, ptr %in1, align 8 @@ -1040,15 +963,15 @@ entry: } define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) { -; V-NOZIP-LABEL: vnsrl_64_double_two_source: -; V-NOZIP: # %bb.0: # %entry -; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; V-NOZIP-NEXT: vle64.v v8, (a0) -; V-NOZIP-NEXT: vle64.v v9, (a1) -; V-NOZIP-NEXT: vmv.v.i v0, 1 -; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t -; V-NOZIP-NEXT: vse64.v v9, (a2) -; V-NOZIP-NEXT: ret +; V-LABEL: vnsrl_64_double_two_source: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; V-NEXT: vle64.v v8, (a0) +; V-NEXT: vle64.v v9, (a1) +; V-NEXT: vmv.v.i v0, 1 +; V-NEXT: vslidedown.vi v9, v8, 1, v0.t +; V-NEXT: vse64.v v9, (a2) +; V-NEXT: ret ; ; ZVE32F-LABEL: vnsrl_64_double_two_source: ; ZVE32F: # %bb.0: # %entry @@ -1057,15 +980,6 @@ define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) { ; ZVE32F-NEXT: sd a0, 0(a2) ; ZVE32F-NEXT: sd a1, 8(a2) ; ZVE32F-NEXT: ret -; -; ZIP-LABEL: vnsrl_64_double_two_source: -; ZIP: # %bb.0: # %entry -; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; ZIP-NEXT: vle64.v v8, (a0) -; ZIP-NEXT: vle64.v v9, (a1) -; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 -; ZIP-NEXT: vse64.v v10, (a2) -; ZIP-NEXT: ret entry: %0 = load <2 x double>, ptr %in0, align 8 %1 = load <2 x double>, ptr %in1, align 8 From c58c2a3325b4865b8a61b940be5d84d7b30ff7cb Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Mar 2025 10:46:03 -0700 Subject: [PATCH 05/12] Address first round review comments --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e3289c854fdce..191a339c1b718 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4613,7 +4613,7 @@ static bool isElementRotate(std::array, 2> &SrcInfo, SrcInfo[1].second - SrcInfo[0].second == (int)NumElts; } -static bool isAlternating(std::array, 2> &SrcInfo, +static bool isAlternating(const std::array, 2> &SrcInfo, ArrayRef Mask, bool &Polarity) { int NumElts = Mask.size(); bool NonUndefFound = false; @@ -4621,7 +4621,7 @@ static bool isAlternating(std::array, 2> &SrcInfo, int M = Mask[i]; if (M < 0) continue; - int Src = M >= (int)NumElts; + int Src = M >= NumElts; int Diff = (int)i - (M % NumElts); bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; if (!NonUndefFound) { @@ -4629,21 +4629,20 @@ static bool isAlternating(std::array, 2> &SrcInfo, Polarity = (C == i % 2); continue; } - if ((Polarity && C != i % 2) || (!Polarity && C == i % 2)) + if (Polarity != (C == i % 2)) return false; } return true; } -static bool isZipEven(std::array, 2> &SrcInfo, +static bool isZipEven(const std::array, 2> &SrcInfo, ArrayRef Mask) { bool Polarity; return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && isAlternating(SrcInfo, Mask, Polarity) && Polarity; - ; } -static bool isZipOdd(std::array, 2> &SrcInfo, +static bool isZipOdd(const std::array, 2> &SrcInfo, ArrayRef Mask) { bool Polarity; return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && @@ -5623,7 +5622,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, } } - if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; From f28ad8d8410b9f413441c9de8a772f80f3c7b1b3 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Mar 2025 10:50:58 -0700 Subject: [PATCH 06/12] Review comment round 2 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 22 ++++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 191a339c1b718..795e8108bd080 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4614,9 +4614,8 @@ static bool isElementRotate(std::array, 2> &SrcInfo, } static bool isAlternating(const std::array, 2> &SrcInfo, - ArrayRef Mask, bool &Polarity) { + const ArrayRef Mask, bool RequiredPolarity) { int NumElts = Mask.size(); - bool NonUndefFound = false; for (unsigned i = 0; i != Mask.size(); ++i) { int M = Mask[i]; if (M < 0) @@ -4624,29 +4623,24 @@ static bool isAlternating(const std::array, 2> &SrcInfo, int Src = M >= NumElts; int Diff = (int)i - (M % NumElts); bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; - if (!NonUndefFound) { - NonUndefFound = true; - Polarity = (C == i % 2); - continue; - } - if (Polarity != (C == i % 2)) + assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) && + "Must match exactly one of the two slides"); + if (RequiredPolarity != (C == i % 2)) return false; } return true; } static bool isZipEven(const std::array, 2> &SrcInfo, - ArrayRef Mask) { - bool Polarity; + const ArrayRef Mask) { return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && - isAlternating(SrcInfo, Mask, Polarity) && Polarity; + isAlternating(SrcInfo, Mask, true); } static bool isZipOdd(const std::array, 2> &SrcInfo, - ArrayRef Mask) { - bool Polarity; + const ArrayRef Mask) { return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && - isAlternating(SrcInfo, Mask, Polarity) && !Polarity; + isAlternating(SrcInfo, Mask, false); } // Lower a deinterleave shuffle to SRL and TRUNC. Factor must be From e934416f81f2602f012211ca00ad80e98a522de7 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Mar 2025 10:53:48 -0700 Subject: [PATCH 07/12] Review comment #3 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 795e8108bd080..c4dfc24100a34 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4902,9 +4902,9 @@ static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { return true; } -static SDValue lowerVIZIP(unsigned Opc, SDValue Op0, SDValue Op1, - const SDLoc &DL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, + const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc || RISCVISD::RI_VZIP2A_VL == Opc); assert(Op0.getSimpleValueType() == Op1.getSimpleValueType()); @@ -5663,8 +5663,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, EvenV, DAG.getVectorIdxConstant(0, DL)); OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV, DAG.getVectorIdxConstant(0, DL)); - return lowerVIZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, - Subtarget); + return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget); } return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } @@ -5720,14 +5719,13 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) { SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2; SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2; - return lowerVIZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, - Subtarget); + return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, + Subtarget); } if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) { SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2; SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2; - return lowerVIZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, - Subtarget); + return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget); } // Build the mask. Note that vslideup unconditionally preserves elements From 2267602a95cc69c9c636ba04f54c3220a42f6071 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Mar 2025 11:29:35 -0700 Subject: [PATCH 08/12] Add descriptive comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b6841e7b352eb..5893962f0a597 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4641,12 +4641,24 @@ static bool isAlternating(const std::array, 2> &SrcInfo, return true; } +/// Given a shuffle which can be represented as a pair of two slides, +/// see if it is a zipeven idiom. Zipeven is: +/// vs2: a0 a1 a2 a3 +/// vs1: b0 b1 b2 b3 +/// vd: a0 b0 a2 b2 static bool isZipEven(const std::array, 2> &SrcInfo, const ArrayRef Mask) { return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && isAlternating(SrcInfo, Mask, true); } +/// Given a shuffle which can be represented as a pair of two slides, +/// see if it is a zipodd idiom. Zipodd is: +/// vs2: a0 a1 a2 a3 +/// vs1: b0 b1 b2 b3 +/// vd: a1 b1 a3 b3 +/// Note that the operand order is swapped due to the way we canonicalize +/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2. static bool isZipOdd(const std::array, 2> &SrcInfo, const ArrayRef Mask) { return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && From 544dd90cf880af60198875637378f53ed8f5c3ec Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Mar 2025 11:43:08 -0700 Subject: [PATCH 09/12] Fixup test missed in merge --- .../fixed-vectors-shuffle-int-interleave.ll | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll index da9a5a1fc1065..917613d5c786f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll @@ -606,9 +606,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; ZIP-NEXT: mul a0, a0, a1 ; ZIP-NEXT: add a0, sp, a0 ; ZIP-NEXT: addi a0, a0, 16 -; ZIP-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZIP-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; ZIP-NEXT: addi a0, sp, 16 -; ZIP-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; ZIP-NEXT: vslidedown.vi v24, v8, 16 ; ZIP-NEXT: li a0, 32 @@ -619,14 +619,14 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; ZIP-NEXT: mul a1, a1, a2 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload ; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; ZIP-NEXT: vslidedown.vi v24, v24, 16 ; ZIP-NEXT: csrr a1, vlenb ; ZIP-NEXT: slli a1, a1, 4 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZIP-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill ; ZIP-NEXT: lui a1, 699051 ; ZIP-NEXT: addi a1, a1, -1366 ; ZIP-NEXT: vmv.s.x v0, a1 @@ -634,17 +634,17 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; ZIP-NEXT: slli a1, a1, 3 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill ; ZIP-NEXT: csrr a1, vlenb ; ZIP-NEXT: slli a1, a1, 4 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload ; ZIP-NEXT: csrr a1, vlenb ; ZIP-NEXT: slli a1, a1, 3 ; ZIP-NEXT: add a1, sp, a1 ; ZIP-NEXT: addi a1, a1, 16 -; ZIP-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZIP-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload ; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24, v0.t ; ZIP-NEXT: csrr a0, vlenb @@ -652,9 +652,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; ZIP-NEXT: mul a0, a0, a1 ; ZIP-NEXT: add a0, sp, a0 ; ZIP-NEXT: addi a0, a0, 16 -; ZIP-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZIP-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; ZIP-NEXT: addi a0, sp, 16 -; ZIP-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; ZIP-NEXT: ri.vzip2a.vv v0, v8, v24 ; ZIP-NEXT: vmv.v.v v8, v0 ; ZIP-NEXT: csrr a0, vlenb From af9dbc5096de51606aa24882723d8f80847b9802 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 28 Mar 2025 12:07:27 -0700 Subject: [PATCH 10/12] Another round of review comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5893962f0a597..6a5f615a7f6c2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4626,7 +4626,7 @@ static bool isElementRotate(const std::array, 2> &SrcInfo, static bool isAlternating(const std::array, 2> &SrcInfo, const ArrayRef Mask, bool RequiredPolarity) { int NumElts = Mask.size(); - for (unsigned i = 0; i != Mask.size(); ++i) { + for (unsigned i = 0; i != NumElts; ++i) { int M = Mask[i]; if (M < 0) continue; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index bb93fea656051..3fe50503f937b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -73,10 +73,8 @@ def ri_vzipodd_vl : SDNode<"RISCVISD::RI_VZIPODD_VL", SDT_RISCVIntBinOp_VL>; def ri_vzip2a_vl : SDNode<"RISCVISD::RI_VZIP2A_VL", SDT_RISCVIntBinOp_VL>; multiclass RIVPseudoVALU_VV { - foreach m = MxList in { - defvar mx = m.MX; + foreach m = MxList in defm "" : VPseudoBinaryV_VV; - } } let Predicates = [HasVendorXRivosVizip], @@ -88,7 +86,7 @@ defm PseudoRI_VZIP2A : RIVPseudoVALU_VV; multiclass RIVPatBinaryVL_VV vtilist = AllIntegerVectors, - bit isSEWAware = 0> { + bit isSEWAware = false> { foreach vti = vtilist in let Predicates = GetVTypePredicates.Predicates in def : VPatBinaryVL_V Date: Fri, 28 Mar 2025 14:22:33 -0700 Subject: [PATCH 11/12] Address review comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6a5f615a7f6c2..1b673b85281ba 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4633,7 +4633,7 @@ static bool isAlternating(const std::array, 2> &SrcInfo, int Src = M >= NumElts; int Diff = (int)i - (M % NumElts); bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; - assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) && + assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) && "Must match exactly one of the two slides"); if (RequiredPolarity != (C == i % 2)) return false; From d4457b4ba77617e87a6314d73a35f26fc494f3e1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Sat, 29 Mar 2025 15:17:34 -0700 Subject: [PATCH 12/12] Review comments --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1b673b85281ba..a8c83113854c9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4624,7 +4624,7 @@ static bool isElementRotate(const std::array, 2> &SrcInfo, } static bool isAlternating(const std::array, 2> &SrcInfo, - const ArrayRef Mask, bool RequiredPolarity) { + ArrayRef Mask, bool RequiredPolarity) { int NumElts = Mask.size(); for (unsigned i = 0; i != NumElts; ++i) { int M = Mask[i]; @@ -4647,7 +4647,7 @@ static bool isAlternating(const std::array, 2> &SrcInfo, /// vs1: b0 b1 b2 b3 /// vd: a0 b0 a2 b2 static bool isZipEven(const std::array, 2> &SrcInfo, - const ArrayRef Mask) { + ArrayRef Mask) { return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && isAlternating(SrcInfo, Mask, true); } @@ -4660,7 +4660,7 @@ static bool isZipEven(const std::array, 2> &SrcInfo, /// Note that the operand order is swapped due to the way we canonicalize /// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2. static bool isZipOdd(const std::array, 2> &SrcInfo, - const ArrayRef Mask) { + ArrayRef Mask) { return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && isAlternating(SrcInfo, Mask, false); }