Skip to content

Commit 62c4ac7

Browse files
committed
[RISCV] Optimize splats of extracted vector elements
This patch adds an optimization to splat-like operations where the splatted value is extracted from a identically-sized vector. On RVV we can splat that via vrgather.vx/vrgather.vi without dropping to scalar beforehand. We do have a similar VECTOR_SHUFFLE-specific optimization but that only works on fixed-length vector types and for those with a constant splat lane. This patch extends this optimization to make it work on scalable-vector types and on unknown extract indices. It is performed during fixed-vector BUILD_VECTOR lowering and during a new DAGCombine on SPLAT_VECTOR for scalable vectors. Reviewed By: craig.topper, khchen Differential Revision: https://reviews.llvm.org/D118456
1 parent 215aba7 commit 62c4ac7

File tree

4 files changed

+75
-52
lines changed

4 files changed

+75
-52
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10861086
setTargetDAGCombine(ISD::SRL);
10871087
setTargetDAGCombine(ISD::SHL);
10881088
setTargetDAGCombine(ISD::STORE);
1089+
setTargetDAGCombine(ISD::SPLAT_VECTOR);
10891090
}
10901091

10911092
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
@@ -2000,6 +2001,40 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
20002001
return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
20012002
}
20022003

2004+
// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2005+
// and lower it as a VRGATHER_VX_VL from the source vector.
2006+
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2007+
SelectionDAG &DAG,
2008+
const RISCVSubtarget &Subtarget) {
2009+
if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2010+
return SDValue();
2011+
SDValue Vec = SplatVal.getOperand(0);
2012+
// Only perform this optimization on vectors of the same size for simplicity.
2013+
if (Vec.getValueType() != VT)
2014+
return SDValue();
2015+
SDValue Idx = SplatVal.getOperand(1);
2016+
// The index must be a legal type.
2017+
if (Idx.getValueType() != Subtarget.getXLenVT())
2018+
return SDValue();
2019+
2020+
MVT ContainerVT = VT;
2021+
if (VT.isFixedLengthVector()) {
2022+
ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2023+
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2024+
}
2025+
2026+
SDValue Mask, VL;
2027+
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2028+
2029+
SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2030+
Idx, Mask, VL);
2031+
2032+
if (!VT.isFixedLengthVector())
2033+
return Gather;
2034+
2035+
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2036+
}
2037+
20032038
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
20042039
const RISCVSubtarget &Subtarget) {
20052040
MVT VT = Op.getSimpleValueType();
@@ -2123,6 +2158,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
21232158
}
21242159

21252160
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2161+
if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2162+
return Gather;
21262163
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
21272164
: RISCVISD::VMV_V_X_VL;
21282165
Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
@@ -8260,6 +8297,16 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
82608297

82618298
break;
82628299
}
8300+
case ISD::SPLAT_VECTOR: {
8301+
EVT VT = N->getValueType(0);
8302+
// Only perform this combine on legal MVT types.
8303+
if (!isTypeLegal(VT))
8304+
break;
8305+
if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
8306+
DAG, Subtarget))
8307+
return Gather;
8308+
break;
8309+
}
82638310
}
82648311

82658312
return SDValue();

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,9 @@ define <4 x half> @splat_c3_v4f16(<4 x half> %v) {
217217
define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
218218
; CHECK-LABEL: splat_idx_v4f16:
219219
; CHECK: # %bb.0:
220-
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
221-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
222-
; CHECK-NEXT: vfmv.f.s ft0, v8
223220
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
224-
; CHECK-NEXT: vfmv.v.f v8, ft0
221+
; CHECK-NEXT: vrgather.vx v9, v8, a0
222+
; CHECK-NEXT: vmv1r.v v8, v9
225223
; CHECK-NEXT: ret
226224
%x = extractelement <4 x half> %v, i64 %idx
227225
%ins = insertelement <4 x half> poison, half %x, i32 0
@@ -270,11 +268,9 @@ define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) {
270268
;
271269
; LMULMAX2-LABEL: splat_idx_v8f32:
272270
; LMULMAX2: # %bb.0:
273-
; LMULMAX2-NEXT: vsetivli zero, 1, e32, m2, ta, mu
274-
; LMULMAX2-NEXT: vslidedown.vx v8, v8, a0
275-
; LMULMAX2-NEXT: vfmv.f.s ft0, v8
276271
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
277-
; LMULMAX2-NEXT: vfmv.v.f v8, ft0
272+
; LMULMAX2-NEXT: vrgather.vx v10, v8, a0
273+
; LMULMAX2-NEXT: vmv.v.v v8, v10
278274
; LMULMAX2-NEXT: ret
279275
%x = extractelement <8 x float> %v, i64 %idx
280276
%ins = insertelement <8 x float> poison, float %x, i32 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -665,11 +665,9 @@ define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) {
665665
define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) {
666666
; CHECK-LABEL: splat_idx_v4i32:
667667
; CHECK: # %bb.0:
668-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
669-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
670-
; CHECK-NEXT: vmv.x.s a0, v8
671668
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
672-
; CHECK-NEXT: vmv.v.x v8, a0
669+
; CHECK-NEXT: vrgather.vx v9, v8, a0
670+
; CHECK-NEXT: vmv.v.v v8, v9
673671
; CHECK-NEXT: ret
674672
%x = extractelement <4 x i32> %v, i64 %idx
675673
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -693,11 +691,9 @@ define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) {
693691
define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) {
694692
; CHECK-LABEL: splat_idx_v8i16:
695693
; CHECK: # %bb.0:
696-
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
697-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
698-
; CHECK-NEXT: vmv.x.s a0, v8
699694
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
700-
; CHECK-NEXT: vmv.v.x v8, a0
695+
; CHECK-NEXT: vrgather.vx v9, v8, a0
696+
; CHECK-NEXT: vmv.v.v v8, v9
701697
; CHECK-NEXT: ret
702698
%x = extractelement <8 x i16> %v, i64 %idx
703699
%ins = insertelement <8 x i16> poison, i16 %x, i32 0

llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@
55
define <vscale x 4 x i32> @splat_c3_nxv4i32(<vscale x 4 x i32> %v) {
66
; CHECK-LABEL: splat_c3_nxv4i32:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu
9-
; CHECK-NEXT: vslidedown.vi v8, v8, 3
10-
; CHECK-NEXT: vmv.x.s a0, v8
11-
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu
12-
; CHECK-NEXT: vmv.v.x v8, a0
8+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
9+
; CHECK-NEXT: vrgather.vi v10, v8, 3
10+
; CHECK-NEXT: vmv.v.v v8, v10
1311
; CHECK-NEXT: ret
1412
%x = extractelement <vscale x 4 x i32> %v, i32 3
1513
%ins = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -20,11 +18,9 @@ define <vscale x 4 x i32> @splat_c3_nxv4i32(<vscale x 4 x i32> %v) {
2018
define <vscale x 4 x i32> @splat_idx_nxv4i32(<vscale x 4 x i32> %v, i64 %idx) {
2119
; CHECK-LABEL: splat_idx_nxv4i32:
2220
; CHECK: # %bb.0:
23-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu
24-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
25-
; CHECK-NEXT: vmv.x.s a0, v8
2621
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu
27-
; CHECK-NEXT: vmv.v.x v8, a0
22+
; CHECK-NEXT: vrgather.vx v10, v8, a0
23+
; CHECK-NEXT: vmv.v.v v8, v10
2824
; CHECK-NEXT: ret
2925
%x = extractelement <vscale x 4 x i32> %v, i64 %idx
3026
%ins = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -35,11 +31,9 @@ define <vscale x 4 x i32> @splat_idx_nxv4i32(<vscale x 4 x i32> %v, i64 %idx) {
3531
define <vscale x 8 x i16> @splat_c4_nxv8i16(<vscale x 8 x i16> %v) {
3632
; CHECK-LABEL: splat_c4_nxv8i16:
3733
; CHECK: # %bb.0:
38-
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu
39-
; CHECK-NEXT: vslidedown.vi v8, v8, 4
40-
; CHECK-NEXT: vmv.x.s a0, v8
41-
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
42-
; CHECK-NEXT: vmv.v.x v8, a0
34+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
35+
; CHECK-NEXT: vrgather.vi v10, v8, 4
36+
; CHECK-NEXT: vmv.v.v v8, v10
4337
; CHECK-NEXT: ret
4438
%x = extractelement <vscale x 8 x i16> %v, i32 4
4539
%ins = insertelement <vscale x 8 x i16> poison, i16 %x, i32 0
@@ -50,11 +44,9 @@ define <vscale x 8 x i16> @splat_c4_nxv8i16(<vscale x 8 x i16> %v) {
5044
define <vscale x 8 x i16> @splat_idx_nxv8i16(<vscale x 8 x i16> %v, i64 %idx) {
5145
; CHECK-LABEL: splat_idx_nxv8i16:
5246
; CHECK: # %bb.0:
53-
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu
54-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
55-
; CHECK-NEXT: vmv.x.s a0, v8
5647
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
57-
; CHECK-NEXT: vmv.v.x v8, a0
48+
; CHECK-NEXT: vrgather.vx v10, v8, a0
49+
; CHECK-NEXT: vmv.v.v v8, v10
5850
; CHECK-NEXT: ret
5951
%x = extractelement <vscale x 8 x i16> %v, i64 %idx
6052
%ins = insertelement <vscale x 8 x i16> poison, i16 %x, i32 0
@@ -65,11 +57,9 @@ define <vscale x 8 x i16> @splat_idx_nxv8i16(<vscale x 8 x i16> %v, i64 %idx) {
6557
define <vscale x 2 x half> @splat_c1_nxv2f16(<vscale x 2 x half> %v) {
6658
; CHECK-LABEL: splat_c1_nxv2f16:
6759
; CHECK: # %bb.0:
68-
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
69-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
70-
; CHECK-NEXT: vfmv.f.s ft0, v8
7160
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
72-
; CHECK-NEXT: vfmv.v.f v8, ft0
61+
; CHECK-NEXT: vrgather.vi v9, v8, 1
62+
; CHECK-NEXT: vmv1r.v v8, v9
7363
; CHECK-NEXT: ret
7464
%x = extractelement <vscale x 2 x half> %v, i32 1
7565
%ins = insertelement <vscale x 2 x half> poison, half %x, i32 0
@@ -80,11 +70,9 @@ define <vscale x 2 x half> @splat_c1_nxv2f16(<vscale x 2 x half> %v) {
8070
define <vscale x 2 x half> @splat_idx_nxv2f16(<vscale x 2 x half> %v, i64 %idx) {
8171
; CHECK-LABEL: splat_idx_nxv2f16:
8272
; CHECK: # %bb.0:
83-
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
84-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
85-
; CHECK-NEXT: vfmv.f.s ft0, v8
86-
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
87-
; CHECK-NEXT: vfmv.v.f v8, ft0
73+
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu
74+
; CHECK-NEXT: vrgather.vx v9, v8, a0
75+
; CHECK-NEXT: vmv1r.v v8, v9
8876
; CHECK-NEXT: ret
8977
%x = extractelement <vscale x 2 x half> %v, i64 %idx
9078
%ins = insertelement <vscale x 2 x half> poison, half %x, i32 0
@@ -95,11 +83,9 @@ define <vscale x 2 x half> @splat_idx_nxv2f16(<vscale x 2 x half> %v, i64 %idx)
9583
define <vscale x 4 x float> @splat_c3_nxv4f32(<vscale x 4 x float> %v) {
9684
; CHECK-LABEL: splat_c3_nxv4f32:
9785
; CHECK: # %bb.0:
98-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu
99-
; CHECK-NEXT: vslidedown.vi v8, v8, 3
100-
; CHECK-NEXT: vfmv.f.s ft0, v8
10186
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
102-
; CHECK-NEXT: vfmv.v.f v8, ft0
87+
; CHECK-NEXT: vrgather.vi v10, v8, 3
88+
; CHECK-NEXT: vmv.v.v v8, v10
10389
; CHECK-NEXT: ret
10490
%x = extractelement <vscale x 4 x float> %v, i64 3
10591
%ins = insertelement <vscale x 4 x float> poison, float %x, i32 0
@@ -110,11 +96,9 @@ define <vscale x 4 x float> @splat_c3_nxv4f32(<vscale x 4 x float> %v) {
11096
define <vscale x 4 x float> @splat_idx_nxv4f32(<vscale x 4 x float> %v, i64 %idx) {
11197
; CHECK-LABEL: splat_idx_nxv4f32:
11298
; CHECK: # %bb.0:
113-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu
114-
; CHECK-NEXT: vslidedown.vx v8, v8, a0
115-
; CHECK-NEXT: vfmv.f.s ft0, v8
116-
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
117-
; CHECK-NEXT: vfmv.v.f v8, ft0
99+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu
100+
; CHECK-NEXT: vrgather.vx v10, v8, a0
101+
; CHECK-NEXT: vmv.v.v v8, v10
118102
; CHECK-NEXT: ret
119103
%x = extractelement <vscale x 4 x float> %v, i64 %idx
120104
%ins = insertelement <vscale x 4 x float> poison, float %x, i32 0

0 commit comments

Comments
 (0)