Skip to content

Commit 984b217

Browse files
frasercrmckmemfrob
authored andcommitted
[RISCV] Pass undef VECTOR_SHUFFLE indices on to BUILD_VECTOR
Often when lowering vector shuffles, we split the shuffle into two LHS/RHS shuffles which are then blended together. To do so we split the original indices into two, indexed into each respective vector. These two index vectors are then separately lowered as BUILD_VECTORs. This patch forwards on any undef indices to the BUILD_VECTOR, rather than having the VECTOR_SHUFFLE lowering decide on an optimal concrete index. The motiviation for ths change is so that we don't duplicate optimization logic between the two lowering methods and let BUILD_VECTOR do what it does best. Propagating undef in this way allows us, for example, to generate `vid.v` to produce the LHS indices of commonly-used interleave-type shuffles. I have designs on further optimizing interleave-type and other common shuffle patterns in the near future. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D104789
1 parent dcec3ea commit 984b217

File tree

6 files changed

+160
-147
lines changed

6 files changed

+160
-147
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,14 +1826,13 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
18261826
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
18271827
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
18281828
if (!IsSelect) {
1829-
bool IsLHS = MaskIndex < (int)NumElts;
1830-
// For "undef" elements of -1, shuffle in element 0 instead.
1831-
GatherIndicesLHS.push_back(
1832-
DAG.getConstant(IsLHS ? std::max(MaskIndex, 0) : 0, DL, XLenVT));
1833-
// TODO: If we're masking out unused elements anyway, it might produce
1834-
// better code if we use the most-common element index instead of 0.
1829+
bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
1830+
GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
1831+
? DAG.getConstant(MaskIndex, DL, XLenVT)
1832+
: DAG.getUNDEF(XLenVT));
18351833
GatherIndicesRHS.push_back(
1836-
DAG.getConstant(IsLHS ? 0 : MaskIndex - NumElts, DL, XLenVT));
1834+
IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
1835+
: DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
18371836
}
18381837
}
18391838

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
5+
target triple = "riscv64-unknown-unknown-elf"
6+
7+
define dso_local <16 x i16> @interleave(<8 x i16> %v0, <8 x i16> %v1) {
8+
; CHECK-LABEL: interleave:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m2 def $v8m2
11+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
12+
; CHECK-NEXT: vmv.v.i v26, 0
13+
; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu
14+
; CHECK-NEXT: vmv2r.v v28, v26
15+
; CHECK-NEXT: vslideup.vi v28, v8, 0
16+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
17+
; CHECK-NEXT: vmv.v.i v30, 0
18+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu
19+
; CHECK-NEXT: vslideup.vi v28, v30, 8
20+
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
21+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
22+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
23+
; CHECK-NEXT: vle16.v v10, (a0)
24+
; CHECK-NEXT: vmv1r.v v8, v9
25+
; CHECK-NEXT: vrgather.vv v12, v28, v10
26+
; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, mu
27+
; CHECK-NEXT: vslideup.vi v26, v8, 0
28+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, tu, mu
29+
; CHECK-NEXT: vslideup.vi v26, v30, 8
30+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
31+
; CHECK-NEXT: vid.v v28
32+
; CHECK-NEXT: vrgather.vv v8, v12, v28
33+
; CHECK-NEXT: lui a0, 11
34+
; CHECK-NEXT: addiw a0, a0, -1366
35+
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
36+
; CHECK-NEXT: vmv.s.x v0, a0
37+
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
38+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_1)
39+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
40+
; CHECK-NEXT: vle16.v v28, (a0)
41+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
42+
; CHECK-NEXT: vrgather.vv v8, v26, v28, v0.t
43+
; CHECK-NEXT: ret
44+
entry:
45+
%v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
46+
%v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
47+
%v4 = shufflevector <16 x i16> %v2, <16 x i16> %v3, <16 x i32> <i32 0, i32 16, i32 2, i32 17, i32 4, i32 18, i32 6, i32 19, i32 8, i32 20, i32 10, i32 21, i32 12, i32 22, i32 14, i32 23>
48+
ret <16 x i16> %v4
49+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,28 +63,22 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
6363
; LMULMAX2-NEXT: addi a0, zero, 2
6464
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
6565
; LMULMAX2-NEXT: vmv.s.x v0, a0
66-
; LMULMAX2-NEXT: lui a0, %hi(.LCPI1_0)
67-
; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI1_0)
6866
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
69-
; LMULMAX2-NEXT: vle32.v v27, (a0)
67+
; LMULMAX2-NEXT: vmv.v.i v27, 3
7068
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, tu, mu
7169
; LMULMAX2-NEXT: vrgather.vv v26, v9, v27, v0.t
7270
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, ta, mu
73-
; LMULMAX2-NEXT: vrgather.vv v27, v10, v25
71+
; LMULMAX2-NEXT: vrgather.vv v28, v10, v25
7472
; LMULMAX2-NEXT: addi a0, zero, 8
7573
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
7674
; LMULMAX2-NEXT: vmv.s.x v0, a0
77-
; LMULMAX2-NEXT: lui a0, %hi(.LCPI1_1)
78-
; LMULMAX2-NEXT: addi a0, a0, %lo(.LCPI1_1)
79-
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
80-
; LMULMAX2-NEXT: vle32.v v25, (a0)
81-
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m1, tu, mu
82-
; LMULMAX2-NEXT: vrgather.vv v27, v11, v25, v0.t
75+
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, mu
76+
; LMULMAX2-NEXT: vrgather.vv v28, v11, v27, v0.t
8377
; LMULMAX2-NEXT: addi a0, zero, 3
8478
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
8579
; LMULMAX2-NEXT: vmv.s.x v0, a0
8680
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, ta, mu
87-
; LMULMAX2-NEXT: vmerge.vvm v8, v27, v26, v0
81+
; LMULMAX2-NEXT: vmerge.vvm v8, v28, v26, v0
8882
; LMULMAX2-NEXT: ret
8983
%z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
9084
ret <4 x float> %z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -133,43 +133,35 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
133133
define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
134134
; RV32-LABEL: vrgather_shuffle_vv_v4f64:
135135
; RV32: # %bb.0:
136-
; RV32-NEXT: addi a0, zero, 1
137-
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
138-
; RV32-NEXT: vmv.s.x v25, a0
139-
; RV32-NEXT: vmv.v.i v28, 0
140-
; RV32-NEXT: vsetvli zero, zero, e16, mf2, tu, mu
141-
; RV32-NEXT: vslideup.vi v28, v25, 3
142136
; RV32-NEXT: lui a0, %hi(.LCPI6_0)
143137
; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0)
144-
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
138+
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
145139
; RV32-NEXT: vle16.v v25, (a0)
146140
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu
147141
; RV32-NEXT: vrgatherei16.vv v26, v8, v25
148142
; RV32-NEXT: addi a0, zero, 8
149143
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
150144
; RV32-NEXT: vmv.s.x v0, a0
151-
; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
152-
; RV32-NEXT: vrgatherei16.vv v26, v10, v28, v0.t
145+
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
146+
; RV32-NEXT: vmv.v.i v25, 1
147+
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
148+
; RV32-NEXT: vrgatherei16.vv v26, v10, v25, v0.t
153149
; RV32-NEXT: vmv2r.v v8, v26
154150
; RV32-NEXT: ret
155151
;
156152
; RV64-LABEL: vrgather_shuffle_vv_v4f64:
157153
; RV64: # %bb.0:
158-
; RV64-NEXT: addi a0, zero, 1
159-
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
160-
; RV64-NEXT: vmv.s.x v26, a0
161-
; RV64-NEXT: vmv.v.i v28, 0
162-
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
163-
; RV64-NEXT: vslideup.vi v28, v26, 3
164154
; RV64-NEXT: lui a0, %hi(.LCPI6_0)
165155
; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0)
166-
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu
167-
; RV64-NEXT: vle64.v v30, (a0)
168-
; RV64-NEXT: vrgather.vv v26, v8, v30
156+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
157+
; RV64-NEXT: vle64.v v28, (a0)
158+
; RV64-NEXT: vrgather.vv v26, v8, v28
169159
; RV64-NEXT: addi a0, zero, 8
170160
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
171161
; RV64-NEXT: vmv.s.x v0, a0
172-
; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, mu
162+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
163+
; RV64-NEXT: vmv.v.i v28, 1
164+
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
173165
; RV64-NEXT: vrgather.vv v26, v10, v28, v0.t
174166
; RV64-NEXT: vmv2r.v v8, v26
175167
; RV64-NEXT: ret
@@ -185,28 +177,31 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
185177
; RV32-NEXT: vmv.s.x v0, a0
186178
; RV32-NEXT: lui a0, %hi(.LCPI7_0)
187179
; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0)
188-
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
189-
; RV32-NEXT: vle16.v v25, (a0)
190-
; RV32-NEXT: lui a0, %hi(.LCPI7_1)
191-
; RV32-NEXT: addi a0, a0, %lo(.LCPI7_1)
192-
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu
180+
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
193181
; RV32-NEXT: vlse64.v v26, (a0), zero
194-
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
182+
; RV32-NEXT: lui a0, 16
183+
; RV32-NEXT: addi a0, a0, 2
184+
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
185+
; RV32-NEXT: vmv.v.x v25, a0
186+
; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
195187
; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t
196188
; RV32-NEXT: vmv2r.v v8, v26
197189
; RV32-NEXT: ret
198190
;
199191
; RV64-LABEL: vrgather_shuffle_xv_v4f64:
200192
; RV64: # %bb.0:
193+
; RV64-NEXT: addi a0, zero, 2
194+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
195+
; RV64-NEXT: vmv.s.x v26, a0
196+
; RV64-NEXT: vmv.v.i v28, 1
197+
; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, mu
198+
; RV64-NEXT: vslideup.vi v28, v26, 2
201199
; RV64-NEXT: addi a0, zero, 12
202200
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
203201
; RV64-NEXT: vmv.s.x v0, a0
204202
; RV64-NEXT: lui a0, %hi(.LCPI7_0)
205203
; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0)
206204
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
207-
; RV64-NEXT: vle64.v v28, (a0)
208-
; RV64-NEXT: lui a0, %hi(.LCPI7_1)
209-
; RV64-NEXT: addi a0, a0, %lo(.LCPI7_1)
210205
; RV64-NEXT: vlse64.v v26, (a0), zero
211206
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
212207
; RV64-NEXT: vrgather.vv v26, v8, v28, v0.t
@@ -220,30 +215,27 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
220215
; RV32-LABEL: vrgather_shuffle_vx_v4f64:
221216
; RV32: # %bb.0:
222217
; RV32-NEXT: addi a0, zero, 3
223-
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
224-
; RV32-NEXT: vmv.s.x v25, a0
225-
; RV32-NEXT: vmv.v.i v28, 0
226-
; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
227-
; RV32-NEXT: vslideup.vi v28, v25, 1
228218
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
229219
; RV32-NEXT: vmv.s.x v0, a0
230220
; RV32-NEXT: lui a0, %hi(.LCPI8_0)
231221
; RV32-NEXT: addi a0, a0, %lo(.LCPI8_0)
232222
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
233223
; RV32-NEXT: vlse64.v v26, (a0), zero
234-
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu
235-
; RV32-NEXT: vrgatherei16.vv v26, v8, v28, v0.t
224+
; RV32-NEXT: lui a0, 48
225+
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
226+
; RV32-NEXT: vmv.v.x v25, a0
227+
; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, mu
228+
; RV32-NEXT: vrgatherei16.vv v26, v8, v25, v0.t
236229
; RV32-NEXT: vmv2r.v v8, v26
237230
; RV32-NEXT: ret
238231
;
239232
; RV64-LABEL: vrgather_shuffle_vx_v4f64:
240233
; RV64: # %bb.0:
241-
; RV64-NEXT: addi a0, zero, 3
242234
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
243-
; RV64-NEXT: vmv.s.x v26, a0
244-
; RV64-NEXT: vmv.v.i v28, 0
245-
; RV64-NEXT: vsetivli zero, 2, e64, m2, tu, mu
246-
; RV64-NEXT: vslideup.vi v28, v26, 1
235+
; RV64-NEXT: vmv.v.i v28, 3
236+
; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, mu
237+
; RV64-NEXT: vmv.s.x v28, zero
238+
; RV64-NEXT: addi a0, zero, 3
247239
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
248240
; RV64-NEXT: vmv.s.x v0, a0
249241
; RV64-NEXT: lui a0, %hi(.LCPI8_0)

0 commit comments

Comments
 (0)