-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Initial codegen support for the XRivosVizip extension #131933
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This implements initial code generation support for the xrivosvizip extension. A couple of things to note: * The zipeven/zipodd matchers were recently rewritten to better match upstream style, so careful review there would be appreciated. * The zipeven/zipodd cases don't yet support type coercion. This will be done in a future patch. * I subsetted the unzip2a/b support in a way which makes it functional, but far from optimal. A further change will reintroduce some of the complexity once it's easy to test and show incremental change.
|
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis implements initial code generation support for the xrivosvizip extension. A couple of things to note:
Patch is 57.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131933.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27a4bbce1f5fc..db9535b1a081a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4553,8 +4553,10 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
/// way through the source.
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
int &OddSrc, const RISCVSubtarget &Subtarget) {
- // We need to be able to widen elements to the next larger integer type.
- if (VT.getScalarSizeInBits() >= Subtarget.getELen())
+ // We need to be able to widen elements to the next larger integer type or
+ // use the zip2a instruction at e64.
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
+ !Subtarget.hasVendorXRivosVizip())
return false;
int Size = Mask.size();
@@ -4611,6 +4613,43 @@ static bool isElementRotate(std::array<std::pair<int, int>, 2> &SrcInfo,
SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
}
+static bool isAlternating(std::array<std::pair<int, int>, 2> &SrcInfo,
+ ArrayRef<int> Mask, bool &Polarity) {
+ int NumElts = Mask.size();
+ bool NonUndefFound = false;
+ for (unsigned i = 0; i != Mask.size(); ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ int Src = M >= (int)NumElts;
+ int Diff = (int)i - (M % NumElts);
+ bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
+ if (!NonUndefFound) {
+ NonUndefFound = true;
+ Polarity = (C == i % 2);
+ continue;
+ }
+ if ((Polarity && C != i % 2) || (!Polarity && C == i % 2))
+ return false;
+ }
+ return true;
+}
+
+static bool isZipEven(std::array<std::pair<int, int>, 2> &SrcInfo,
+ ArrayRef<int> Mask) {
+ bool Polarity;
+ return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 &&
+ isAlternating(SrcInfo, Mask, Polarity) && Polarity;
+;
+}
+
+static bool isZipOdd(std::array<std::pair<int, int>, 2> &SrcInfo,
+ ArrayRef<int> Mask) {
+ bool Polarity;
+ return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 &&
+ isAlternating(SrcInfo, Mask, Polarity) && !Polarity;
+}
+
// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
// 2, 4, 8 and the integer type Factor-times larger than VT's
// element type must be a legal element type.
@@ -4870,6 +4909,36 @@ static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
return true;
}
+static SDValue lowerVIZIP(unsigned Opc, SDValue Op0, SDValue Op1,
+ const SDLoc &DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
+ RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
+ RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
+ assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
+
+ MVT VT = Op0.getSimpleValueType();
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ Op0 = DAG.getBitcast(IntVT, Op0);
+ Op1 = DAG.getBitcast(IntVT, Op1);
+
+ MVT ContainerVT = IntVT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Passthru = DAG.getUNDEF(ContainerVT);
+ SDValue Res =
+ DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL);
+ if (IntVT.isFixedLengthVector())
+ Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
+ Res = DAG.getBitcast(VT, Res);
+ return Res;
+}
+
// Given a vector a, b, c, d return a vector Factor times longer
// with Factor-1 undef's between elements. Ex:
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
@@ -5384,6 +5453,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
MVT VT = Op.getSimpleValueType();
+ EVT ElemVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
@@ -5556,6 +5626,25 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
+ // If this is an e64 deinterleave(2) (possibly with two distinct sources)
+ // match to the vunzip2a/vunzip2b.
+ unsigned Index = 0;
+ if (Subtarget.hasVendorXRivosVizip() && ElemVT == MVT::i64 &&
+ ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) &&
+ 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
+ unsigned Opc = Index == 0 ?
+ RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
+ V1 = lowerVIZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
+ V2 = lowerVIZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
+
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getVectorIdxConstant(0, DL));
+ V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
+ DAG.getVectorIdxConstant(0, DL));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V1, V2);
+ }
+
if (SDValue V =
lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
@@ -5596,6 +5685,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
DAG.getVectorIdxConstant(OddSrc % Size, DL));
}
+ // Prefer vzip2a if available.
+ // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
+ if (Subtarget.hasVendorXRivosVizip()) {
+ EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ EvenV, DAG.getVectorIdxConstant(0, DL));
+ OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ OddV, DAG.getVectorIdxConstant(0, DL));
+ return lowerVIZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
+ }
return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
}
@@ -5647,6 +5745,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
+ if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) {
+ SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
+ SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
+ return lowerVIZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, Subtarget);
+ }
+ if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) {
+ SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
+ SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
+ return lowerVIZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget);
+ }
+
// Build the mask. Note that vslideup unconditionally preserves elements
// below the slide amount in the destination, and thus those elements are
// undefined in the mask. If the mask ends up all true (or undef), it
@@ -6710,7 +6819,7 @@ static bool hasPassthruOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6734,12 +6843,13 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
return true;
- if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
+ if (Opcode >= RISCVISD::VRGATHER_VX_VL &&
+ Opcode <= RISCVISD::LAST_VL_VECTOR_OP)
return true;
if (Opcode >= RISCVISD::STRICT_FADD_VL &&
Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
@@ -21758,6 +21868,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VFIRST_VL)
+ NODE_NAME_CASE(RI_VZIPEVEN_VL)
+ NODE_NAME_CASE(RI_VZIPODD_VL)
+ NODE_NAME_CASE(RI_VZIP2A_VL)
+ NODE_NAME_CASE(RI_VZIP2B_VL)
+ NODE_NAME_CASE(RI_VUNZIP2A_VL)
+ NODE_NAME_CASE(RI_VUNZIP2B_VL)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ffbc14a29006c..b271bc68427e9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -403,7 +403,15 @@ enum NodeType : unsigned {
// vfirst.m with additional mask and VL operands.
VFIRST_VL,
- LAST_VL_VECTOR_OP = VFIRST_VL,
+ // XRivosVizip
+ RI_VZIPEVEN_VL,
+ RI_VZIPODD_VL,
+ RI_VZIP2A_VL,
+ RI_VZIP2B_VL,
+ RI_VUNZIP2A_VL,
+ RI_VUNZIP2B_VL,
+
+ LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL,
// Read VLENB CSR
READ_VLENB,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
index 78c4ed6f00412..395fd917bfe42 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
@@ -67,6 +67,46 @@ defm RI_VUNZIP2A_V : VALU_IV_V<"ri.vunzip2a", 0b001000>;
defm RI_VUNZIP2B_V : VALU_IV_V<"ri.vunzip2b", 0b011000>;
}
+// These are modeled after the int binop VL nodes
+def ri_vzipeven_vl : SDNode<"RISCVISD::RI_VZIPEVEN_VL", SDT_RISCVIntBinOp_VL>;
+def ri_vzipodd_vl : SDNode<"RISCVISD::RI_VZIPODD_VL", SDT_RISCVIntBinOp_VL>;
+def ri_vzip2a_vl : SDNode<"RISCVISD::RI_VZIP2A_VL", SDT_RISCVIntBinOp_VL>;
+def ri_vunzip2a_vl : SDNode<"RISCVISD::RI_VUNZIP2A_VL", SDT_RISCVIntBinOp_VL>;
+def ri_vunzip2b_vl : SDNode<"RISCVISD::RI_VUNZIP2B_VL", SDT_RISCVIntBinOp_VL>;
+
+multiclass RIVPseudoVALU_VV {
+ foreach m = MxList in {
+ defvar mx = m.MX;
+ defm "" : VPseudoBinaryV_VV<m, Commutable=0>;
+ }
+}
+
+let Predicates = [HasVendorXRivosVizip],
+ Constraints = "@earlyclobber $rd, $rd = $passthru" in {
+defm PseudoRI_VZIPEVEN : RIVPseudoVALU_VV;
+defm PseudoRI_VZIPODD : RIVPseudoVALU_VV;
+defm PseudoRI_VZIP2A : RIVPseudoVALU_VV;
+defm PseudoRI_VUNZIP2A : RIVPseudoVALU_VV;
+defm PseudoRI_VUNZIP2B : RIVPseudoVALU_VV;
+}
+
+multiclass RIVPatBinaryVL_VV<SDPatternOperator vop, string instruction_name,
+ list<VTypeInfo> vtilist = AllIntegerVectors,
+ bit isSEWAware = 0> {
+ foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.RegClass, isSEWAware>;
+}
+
+defm : RIVPatBinaryVL_VV<ri_vzipeven_vl, "PseudoRI_VZIPEVEN">;
+defm : RIVPatBinaryVL_VV<ri_vzipodd_vl, "PseudoRI_VZIPODD">;
+defm : RIVPatBinaryVL_VV<ri_vzip2a_vl, "PseudoRI_VZIP2A">;
+defm : RIVPatBinaryVL_VV<ri_vunzip2a_vl, "PseudoRI_VUNZIP2A">;
+defm : RIVPatBinaryVL_VV<ri_vunzip2b_vl, "PseudoRI_VUNZIP2B">;
+
//===----------------------------------------------------------------------===//
// XRivosVisni
//===----------------------------------------------------------------------===//
@@ -87,3 +127,5 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd),
(ins VR:$vs2, uimm5:$imm),
"ri.vextract.x.v", "$rd, $vs2, $imm">;
}
+
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index 9279e0a4d3a6c..2165c6025f7e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -1,10 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b \
; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
-; RUN: | FileCheck %s --check-prefixes=CHECK,V
+; RUN: | FileCheck %s --check-prefixes=CHECK,V,V-NOZIP
; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \
; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b,+experimental-xrivosvizip \
+; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefixes=CHECK,V,ZIP
define void @vnsrl_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: vnsrl_0_i8:
@@ -247,15 +250,15 @@ entry:
}
define void @vnsrl_0_i64(ptr %in, ptr %out) {
-; V-LABEL: vnsrl_0_i64:
-; V: # %bb.0: # %entry
-; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; V-NEXT: vle64.v v8, (a0)
-; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; V-NEXT: vslidedown.vi v9, v8, 2
-; V-NEXT: vslideup.vi v8, v9, 1
-; V-NEXT: vse64.v v8, (a1)
-; V-NEXT: ret
+; V-NOZIP-LABEL: vnsrl_0_i64:
+; V-NOZIP: # %bb.0: # %entry
+; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; V-NOZIP-NEXT: vle64.v v8, (a0)
+; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2
+; V-NOZIP-NEXT: vslideup.vi v8, v9, 1
+; V-NOZIP-NEXT: vse64.v v8, (a1)
+; V-NOZIP-NEXT: ret
;
; ZVE32F-LABEL: vnsrl_0_i64:
; ZVE32F: # %bb.0: # %entry
@@ -264,6 +267,18 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
+; ZIP-NEXT: vslidedown.vi v8, v8, 2
+; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT: vslideup.vi v10, v11, 1
+; ZIP-NEXT: vse64.v v10, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
@@ -272,16 +287,16 @@ entry:
}
define void @vnsrl_64_i64(ptr %in, ptr %out) {
-; V-LABEL: vnsrl_64_i64:
-; V: # %bb.0: # %entry
-; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; V-NEXT: vle64.v v8, (a0)
-; V-NEXT: vmv.v.i v0, 1
-; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; V-NEXT: vslidedown.vi v9, v8, 2
-; V-NEXT: vslidedown.vi v9, v8, 1, v0.t
-; V-NEXT: vse64.v v9, (a1)
-; V-NEXT: ret
+; V-NOZIP-LABEL: vnsrl_64_i64:
+; V-NOZIP: # %bb.0: # %entry
+; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; V-NOZIP-NEXT: vle64.v v8, (a0)
+; V-NOZIP-NEXT: vmv.v.i v0, 1
+; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t
+; V-NOZIP-NEXT: vse64.v v9, (a1)
+; V-NOZIP-NEXT: ret
;
; ZVE32F-LABEL: vnsrl_64_i64:
; ZVE32F: # %bb.0: # %entry
@@ -290,6 +305,18 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_64_i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: ri.vunzip2b.vv v10, v8, v9
+; ZIP-NEXT: vslidedown.vi v8, v8, 2
+; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v9
+; ZIP-NEXT: vslideup.vi v10, v11, 1
+; ZIP-NEXT: vse64.v v10, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
@@ -323,16 +350,16 @@ entry:
}
define void @vnsrl_64_double(ptr %in, ptr %out) {
-; V-LABEL: vnsrl_64_double:
-; V: # %bb.0: # %entry
-; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; V-NEXT: vle64.v v8, (a0)
-; V-NEXT: vmv.v.i v0, 1
-; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; V-NEXT: vslidedown.vi v9, v8, 2
-; V-NEXT: vslidedown.vi v9, v8, 1, v0.t
-; V-NEXT: vse64.v v9, (a1)
-; V-NEXT: ret
+; V-NOZIP-LABEL: vnsrl_64_double:
+; V-NOZIP: # %bb.0: # %entry
+; V-NOZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; V-NOZIP-NEXT: vle64.v v8, (a0)
+; V-NOZIP-NEXT: vmv.v.i v0, 1
+; V-NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 2
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t
+; V-NOZIP-NEXT: vse64.v v9, (a1)
+; V-NOZIP-NEXT: ret
;
; ZVE32F-LABEL: vnsrl_64_double:
; ZVE32F: # %bb.0: # %entry
@@ -341,6 +368,16 @@ define void @vnsrl_64_double(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_64_double:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse64.v v10, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
@@ -802,15 +839,15 @@ entry:
}
define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) {
-; V-LABEL: vnsrl_32_i32_two_source:
-; V: # %bb.0: # %entry
-; V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; V-NEXT: vle32.v v8, (a0)
-; V-NEXT: vle32.v v9, (a1)
-; V-NEXT: vmv.v.i v0, 1
-; V-NEXT: vslidedown.vi v9, v8, 1, v0.t
-; V-NEXT: vse32.v v9, (a2)
-; V-NEXT: ret
+; V-NOZIP-LABEL: vnsrl_32_i32_two_source:
+; V-NOZIP: # %bb.0: # %entry
+; V-NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; V-NOZIP-NEXT: vle32.v v8, (a0)
+; V-NOZIP-NEXT: vle32.v v9, (a1)
+; V-NOZIP-NEXT: vmv.v.i v0, 1
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t
+; V-NOZIP-NEXT: vse32.v v9, (a2)
+; V-NOZIP-NEXT: ret
;
; ZVE32F-LABEL: vnsrl_32_i32_two_source:
; ZVE32F: # %bb.0: # %entry
@@ -821,6 +858,15 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t
; ZVE32F-NEXT: vse32.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_32_i32_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vle32.v v9, (a1)
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse32.v v10, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x i32>, ptr %in0, align 4
%1 = load <2 x i32>, ptr %in1, align 4
@@ -856,15 +902,15 @@ entry:
}
define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) {
-; V-LABEL: vnsrl_32_float_two_source:
-; V: # %bb.0: # %entry
-; V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
-; V-NEXT: vle32.v v8, (a0)
-; V-NEXT: vle32.v v9, (a1)
-; V-NEXT: vmv.v.i v0, 1
-; V-NEXT: vslidedown.vi v9, v8, 1, v0.t
-; V-NEXT: vse32.v v9, (a2)
-; V-NEXT: ret
+; V-NOZIP-LABEL: vnsrl_32_float_two_source:
+; V-NOZIP: # %bb.0: # %entry
+; V-NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; V-NOZIP-NEXT: vle32.v v8, (a0)
+; V-NOZIP-NEXT: vle32.v v9, (a1)
+; V-NOZIP-NEXT: vmv.v.i v0, 1
+; V-NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t
+; V-NOZIP-NEXT: vse32.v v9, (a2)
+; V-NOZIP-NEXT: ret
;
; ZVE32F-LABEL: vnsrl_32_float_two_source:
; ZVE32F: # %bb.0: # %entry
@@ -875,6 +921,15 @@ define void @vnsrl_32_float_two_s...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
Outdated
Show resolved
Hide resolved
mshockwave
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably can use this for llvm.vector.(de)interleave2 and other power-of-two factors as well. But that can be a follow-up patch.
mshockwave
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/10306 Here is the relevant piece of the build log for the reference |
This implements initial code generation support for a subset of the xrivosvizip extension. Specifically, this adds support for vzipeven, vzipodd, and vzip2a, but not vzip2b, vunzip2a, or vunzip2b. The others will follow in separate patches.
One review note: The zipeven/zipodd matchers were recently rewritten to better match upstream style, so careful review there would be appreciated. The matchers don't yet support type coercion to wider types. This will be done in a future patch.