-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] Add codegen support for ri.vinsert.v.x and ri.vextract.x.v #136708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
These instructions are included in XRivosVisni. They perform a scalar insert into a vector (with a potentially non-zero index) and a scalar extract from a vector (with a potentially non-zero index) respectively. They're very analogous to vmv.s.x and vmv.x.s respectively. The instructions do have a couple restrictions: 1) Only constant indices are supported w/a uimm5 format. 2) There are no FP variants. One important property of these instructions is that their throughput and latency are expected to be LMUL independent.
|
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThese instructions are included in XRivosVisni. They perform a scalar insert into a vector (with a potentially non-zero index) and a scalar extract from a vector (with a potentially non-zero index) respectively. They're very analogous to vmv.s.x and vmv.x.s respectively. The instructions do have a couple restrictions:
One important property of these instructions is that their throughput and latency are expected to be LMUL independent. Patch is 62.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136708.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 98fba9e86e88a..30ef1b5d8d209 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6935,7 +6935,7 @@ static bool hasPassthruOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6959,7 +6959,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
@@ -9567,6 +9567,13 @@ getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
return SmallerVT;
}
+static bool isValidInsertExtractIndex(SDValue Idx) {
+ auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
+ if (!IdxC || isNullConstant(Idx))
+ return false;
+ return IdxC->getZExtValue() < 32;
+}
+
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
@@ -9677,6 +9684,26 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
}
+
+ // Use ri.vinsert.v.x if available.
+ if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
+ isValidInsertExtractIndex(Idx)) {
+ unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
+ Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
+ Policy = RISCVVType::TAIL_AGNOSTIC;
+ SDValue PolicyOp =
+ DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
+ Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
+ VL, PolicyOp);
+ if (AlignedIdx)
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Vec, AlignedIdx);
+ if (!VecVT.isFixedLengthVector())
+ return Vec;
+ return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
+ }
+
ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
} else {
// On RV32, i64-element vectors must be specially handled to place the
@@ -9876,6 +9903,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
}
}
+ // Use ri.vextract.x.v if available.
+ // TODO: Avoid index 0 and just use the vmv.x.s
+ if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
+ isValidInsertExtractIndex(Idx)) {
+ SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
+ }
+
// If after narrowing, the required slide is still greater than LMUL2,
// fallback to generic expansion and go through the stack. This is done
// for a subtle reason: extracting *all* elements out of a vector is
@@ -22253,11 +22288,13 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VFIRST_VL)
+ NODE_NAME_CASE(RI_VINSERT_VL)
NODE_NAME_CASE(RI_VZIPEVEN_VL)
NODE_NAME_CASE(RI_VZIPODD_VL)
NODE_NAME_CASE(RI_VZIP2A_VL)
NODE_NAME_CASE(RI_VUNZIP2A_VL)
NODE_NAME_CASE(RI_VUNZIP2B_VL)
+ NODE_NAME_CASE(RI_VEXTRACT)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index baf1b2e4d8e6e..871b6b0c53775 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -404,6 +404,10 @@ enum NodeType : unsigned {
// vfirst.m with additional mask and VL operands.
VFIRST_VL,
+ // XRivosVisni
+ // VINSERT matches the semantics of ri.vinsert.v.x. It carries a VL operand.
+ RI_VINSERT_VL,
+
// XRivosVizip
RI_VZIPEVEN_VL,
RI_VZIPODD_VL,
@@ -413,6 +417,12 @@ enum NodeType : unsigned {
LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL,
+ // XRivosVisni
+ // VEXTRACT matches the semantics of ri.vextract.x.v. The result is always
+ // XLenVT sign extended from the vector element size. VEXTRACT does *not*
+ // have a VL operand.
+ RI_VEXTRACT,
+
// Read VLENB CSR
READ_VLENB,
// Reads value of CSR.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 2247610c21ffb..928b9c1f5b5ba 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -94,6 +94,14 @@ static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
}
}
+static bool isVInsertInstr(const MachineInstr &MI) {
+ return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VINSERT;
+}
+
+static bool isVExtractInstr(const MachineInstr &MI) {
+ return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VEXTRACT;
+}
+
static bool isScalarExtractInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
@@ -538,6 +546,18 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
Res.MaskPolicy = false;
}
+ if (isVExtractInstr(MI)) {
+ assert(!RISCVII::hasVLOp(TSFlags));
+ // TODO: LMUL can be any larger value (without cost)
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
+ if (isVInsertInstr(MI)) {
+ // TODO: LMUL can be any larger value (without cost)
+ Res.MaskPolicy = false;
+ }
+
return Res;
}
@@ -1085,7 +1105,7 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
}
} else {
- assert(isScalarExtractInstr(MI));
+ assert(isScalarExtractInstr(MI) || isVExtractInstr(MI));
// Pick a random value for state tracking purposes, will be ignored via
// the demanded fields mechanism
InstrInfo.setAVLImm(1);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
index 147f89850765a..2597c9d3a7067 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
@@ -125,3 +125,58 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd),
(ins VR:$vs2, uimm5:$imm),
"ri.vextract.x.v", "$rd, $vs2, $imm">;
}
+
+
+def ri_vextract : SDNode<"RISCVISD::RI_VEXTRACT",
+ SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<1>,
+ SDTCisInt<2>,
+ SDTCisInt<1>]>>;
+
+def ri_vinsert_vl : SDNode<"RISCVISD::RI_VINSERT_VL",
+ SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
+ SDTCisInt<0>,
+ SDTCisVT<2, XLenVT>,
+ SDTCisVT<3, XLenVT>,
+ SDTCisVT<4, XLenVT>]>>;
+
+
+let Predicates = [HasVendorXRivosVisni], mayLoad = 0, mayStore = 0,
+ hasSideEffects = 0, HasSEWOp = 1 in
+foreach m = MxList in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ let BaseInstr = RI_VEXTRACT in
+ def PseudoRI_VEXTRACT_ # mx :
+ Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, uimm6:$idx, ixlenimm:$sew),
+ []>,
+ RISCVVPseudo;
+
+ let HasVLOp = 1, BaseInstr = RI_VINSERT, HasVecPolicyOp = 1,
+ Constraints = "$rd = $rs1" in
+ def PseudoRI_VINSERT_ # mx :
+ Pseudo<(outs m.vrclass:$rd),
+ (ins m.vrclass:$rs1, GPR:$rs2, uimm5:$idx, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy),
+ []>,
+ RISCVVPseudo;
+ }
+}
+
+
+
+foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(XLenVT (ri_vextract (vti.Vector vti.RegClass:$vs2), uimm5:$imm)),
+ (!cast<Instruction>("PseudoRI_VEXTRACT_" # vti.LMul.MX)
+ $vs2, uimm5:$imm, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (ri_vinsert_vl (vti.Vector vti.RegClass:$merge),
+ vti.ScalarRegClass:$rs1,
+ uimm5:$imm,
+ VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoRI_VINSERT_" # vti.LMul.MX)
+ $merge, vti.ScalarRegClass:$rs1, uimm5:$imm,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+ }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 7e45136372b6c..75732fe2f7e65 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -7,6 +7,8 @@
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m,+experimental-xrivosvisni -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VISNI
+
define i8 @extractelt_v16i8(<16 x i8> %a) nounwind {
; CHECK-LABEL: extractelt_v16i8:
; CHECK: # %bb.0:
@@ -14,6 +16,12 @@ define i8 @extractelt_v16i8(<16 x i8> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16i8:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <16 x i8> %a, i32 7
ret i8 %b
}
@@ -25,6 +33,12 @@ define i16 @extractelt_v8i16(<8 x i16> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8i16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <8 x i16> %a, i32 7
ret i16 %b
}
@@ -36,6 +50,12 @@ define i32 @extractelt_v4i32(<4 x i32> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 2
+; VISNI-NEXT: ret
%b = extractelement <4 x i32> %a, i32 2
ret i32 %b
}
@@ -55,6 +75,12 @@ define i64 @extractelt_v2i64(<2 x i64> %a) nounwind {
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v2i64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = extractelement <2 x i64> %a, i32 0
ret i64 %b
}
@@ -67,6 +93,13 @@ define bfloat @extractelt_v8bf16(<8 x bfloat> %a) nounwind {
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8bf16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: fmv.h.x fa0, a0
+; VISNI-NEXT: ret
%b = extractelement <8 x bfloat> %a, i32 7
ret bfloat %b
}
@@ -86,6 +119,13 @@ define half @extractelt_v8f16(<8 x half> %a) nounwind {
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8f16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 7
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <8 x half> %a, i32 7
ret half %b
}
@@ -97,6 +137,13 @@ define float @extractelt_v4f32(<4 x float> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4f32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 2
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <4 x float> %a, i32 2
ret float %b
}
@@ -107,6 +154,12 @@ define double @extractelt_v2f64(<2 x double> %a) nounwind {
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v2f64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <2 x double> %a, i32 0
ret double %b
}
@@ -118,6 +171,12 @@ define i8 @extractelt_v32i8(<32 x i8> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v32i8:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <32 x i8> %a, i32 7
ret i8 %b
}
@@ -129,6 +188,12 @@ define i16 @extractelt_v16i16(<16 x i16> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16i16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <16 x i16> %a, i32 7
ret i16 %b
}
@@ -140,6 +205,12 @@ define i32 @extractelt_v8i32(<8 x i32> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 6
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 6
+; VISNI-NEXT: ret
%b = extractelement <8 x i32> %a, i32 6
ret i32 %b
}
@@ -161,6 +232,12 @@ define i64 @extractelt_v4i64(<4 x i64> %a) nounwind {
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4i64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 3
+; VISNI-NEXT: ret
%b = extractelement <4 x i64> %a, i32 3
ret i64 %b
}
@@ -173,6 +250,13 @@ define bfloat @extractelt_v16bf16(<16 x bfloat> %a) nounwind {
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16bf16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: fmv.h.x fa0, a0
+; VISNI-NEXT: ret
%b = extractelement <16 x bfloat> %a, i32 7
ret bfloat %b
}
@@ -192,6 +276,13 @@ define half @extractelt_v16f16(<16 x half> %a) nounwind {
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16f16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 7
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <16 x half> %a, i32 7
ret half %b
}
@@ -203,6 +294,13 @@ define float @extractelt_v8f32(<8 x float> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8f32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 2
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <8 x float> %a, i32 2
ret float %b
}
@@ -213,6 +311,12 @@ define double @extractelt_v4f64(<4 x double> %a) nounwind {
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4f64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <4 x double> %a, i32 0
ret double %b
}
@@ -237,6 +341,12 @@ define i64 @extractelt_v3i64(<3 x i64> %a) nounwind {
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v3i64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 2
+; VISNI-NEXT: ret
%b = extractelement <3 x i64> %a, i32 2
ret i64 %b
}
@@ -278,6 +388,12 @@ define i32 @extractelt_v32i32(<32 x i32> %a) nounwind {
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 256
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v32i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 31
+; VISNI-NEXT: ret
%b = extractelement <32 x i32> %a, i32 31
ret i32 %b
}
@@ -319,6 +435,12 @@ define i32 @extractelt_v64i32(<64 x i32> %a) nounwind {
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 256
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v64i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v16, 31
+; VISNI-NEXT: ret
%b = extractelement <64 x i32> %a, i32 63
ret i32 %b
}
@@ -330,6 +452,13 @@ define i8 @extractelt_v16i8_idx(<16 x i8> %a, i32 zeroext %idx) nounwind {
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16i8_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; VISNI-NEXT: vslidedown.vx v8, v8, a0
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = extractelement <16 x i8> %a, i32 %idx
ret i8 %b
}
@@ -341,6 +470,13 @@ define i16 @extractelt_v8i16_idx(<8 x i16> %a, i32 zeroext %idx) nounwind {
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8i16_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: vslidedown.vx v8, v8, a0
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = extractelement <8 x i16> %a, i32 %idx
ret i16 %b
}
@@ -353,6 +489,14 @@ define i32 @extractelt_v4i32_idx(<4 x i32> %a, i32 zeroext %idx) nounwind {
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4i32_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VISNI-NEXT: vadd.vv v8, v8, v8
+; VISNI-NEXT: vslidedown.vx v8, v8, a0
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = add <4 x i32> %a, %a
%c = extractelement <4 x i32> %b, i32 %idx
ret i32 %c
@@ -378,6 +522,14 @@ define i64 @extractelt_v2i64_idx(<2 x i64> %a, i32 zeroext %idx) nounwind {
; RV64-NEXT: vslidedown.vx v8, v8, a0
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v2i64_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; VISNI-NEXT: vadd.vv v8, v8, v8
+; VISNI-NEXT: vslide...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A bit of context for reviewers on the upcoming patches in this area. This is the first in a small series, but at the moment, it's a quite small series. I've got a buildvector lowering change (very small), want to rework the extract path to use vmv.s.x for index zero, and one possible bugfix I have to either extract a test for, or convince myself it's no longer relevant.
I might later explore the fold memory operand and rematerialization hooks, but that'll be a bit down the line.
| SDTCisVT<4, XLenVT>]>>; | ||
|
|
||
|
|
||
| let Predicates = [HasVendorXRivosVisni], mayLoad = 0, mayStore = 0, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not thrilled with this bit of tablegen, if anyone has suggestions on how to improve, please let me know!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks fine to me, is there anything in particular that you're worried about?
| ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M | ||
| ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M | ||
|
|
||
| ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m,+experimental-xrivosvisni -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VISNI |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having a completely separate check line here seemed like the least bad choice. If if I try to reuse CHECK, most of the other tests fork. I could add a NO-VISNI intermediate state, but that seemed ugly too. Happy to defer to reviewer preference here.
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
|
||
| if (isVExtractInstr(MI)) { | ||
| assert(!RISCVII::hasVLOp(TSFlags)); | ||
| // TODO: LMUL can be any larger value (without cost) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would be an interesting optimisation. I could imagine us having more demanded LMUL types e.g. Res.LMUL = Res.LMULGreaterThanOrEqualToMX
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In a sense, we sort of already have this for vmv.s.x and vmv.x.s, it's just that the MX value is the smallest legal LMUL, and thus GTE becomes any LMUL.
| SDTCisVT<4, XLenVT>]>>; | ||
|
|
||
|
|
||
| let Predicates = [HasVendorXRivosVisni], mayLoad = 0, mayStore = 0, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks fine to me, is there anything in particular that you're worried about?
| if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && | ||
| Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) | ||
| Policy = RISCVVType::TAIL_AGNOSTIC; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm aware that this is copied from the existing case below, but if the VL here is always going to be >= VLMAX then can we not always set RISCVVType::TAIL_AGNOSTIC?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VL is always going to be <= VLMAX, not the other way around. But yes, you're correct that we can set TA in all cases here. I'll do that before landing.
Edit: For clarity, this depends on a specification clarification of what elements are tail elements for this instruction which hasn't yet been pushed. In short, it's the elements past VLMAX (since we allow writes past VL), but the usual past VL. This means that only fractional LMUL have tail elements.
These instructions are included in XRivosVisni. They perform a scalar insert into a vector (with a potentially non-zero index) and a scalar extract from a vector (with a potentially non-zero index) respectively. They're very analogous to vmv.s.x and vmv.x.s respectively.
The instructions do have a couple restrictions:
One important property of these instructions is that their throughput and latency are expected to be LMUL independent.